diff options
author | Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> | 2008-03-10 14:01:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-03-14 22:02:49 -0400 |
commit | 0e1f34833bd9170ccc93ab759e48e695917fa48f (patch) | |
tree | 2ca4b89dd755f2d82dbd34466a8cd6d8b5b4a0af | |
parent | 4faa8496650f9417189dacce8f933e8ec61dc032 (diff) |
sched: fix race in schedule()
Fix a hard to trigger crash seen in the -rt kernel that also affects
the vanilla scheduler.
There is a race condition between schedule() and some dequeue/enqueue
functions; rt_mutex_setprio(), __setscheduler() and sched_move_task().
When scheduling to idle, idle_balance() is called to pull tasks from
other busy processor. It might drop the rq lock. It means that those 3
functions encounter on_rq=0 and running=1. The current task should be
put when running.
Here is a possible scenario:
CPU0 CPU1
| schedule()
| ->deactivate_task()
| ->idle_balance()
| -->load_balance_newidle()
rt_mutex_setprio() |
| --->double_lock_balance()
*get lock *rel lock
* on_rq=0, ruuning=1 |
* sched_class is changed |
*rel lock *get lock
: |
:
->put_prev_task_rt()
->pick_next_task_fair()
=> panic
The current process of CPU1(P1) is scheduling. Deactivated P1, and the
scheduler looks for another process on other CPU's runqueue because CPU1
will be idle. idle_balance(), load_balance_newidle() and
double_lock_balance() are called and double_lock_balance() could drop
the rq lock. On the other hand, CPU0 is trying to boost the priority of
P1. The result of boosting only P1's prio and sched_class are changed to
RT. The sched entities of P1 and P1's group are never put. It makes
cfs_rq invalid, because the cfs_rq has curr and no leaf, but
pick_next_task_fair() is called, then the kernel panics.
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 38 |
1 files changed, 16 insertions, 22 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 1cb53fb1fe3d..9df9ba73cb7a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4268,11 +4268,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4268 | oldprio = p->prio; | 4268 | oldprio = p->prio; |
4269 | on_rq = p->se.on_rq; | 4269 | on_rq = p->se.on_rq; |
4270 | running = task_current(rq, p); | 4270 | running = task_current(rq, p); |
4271 | if (on_rq) { | 4271 | if (on_rq) |
4272 | dequeue_task(rq, p, 0); | 4272 | dequeue_task(rq, p, 0); |
4273 | if (running) | 4273 | if (running) |
4274 | p->sched_class->put_prev_task(rq, p); | 4274 | p->sched_class->put_prev_task(rq, p); |
4275 | } | ||
4276 | 4275 | ||
4277 | if (rt_prio(prio)) | 4276 | if (rt_prio(prio)) |
4278 | p->sched_class = &rt_sched_class; | 4277 | p->sched_class = &rt_sched_class; |
@@ -4281,10 +4280,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4281 | 4280 | ||
4282 | p->prio = prio; | 4281 | p->prio = prio; |
4283 | 4282 | ||
4283 | if (running) | ||
4284 | p->sched_class->set_curr_task(rq); | ||
4284 | if (on_rq) { | 4285 | if (on_rq) { |
4285 | if (running) | ||
4286 | p->sched_class->set_curr_task(rq); | ||
4287 | |||
4288 | enqueue_task(rq, p, 0); | 4286 | enqueue_task(rq, p, 0); |
4289 | 4287 | ||
4290 | check_class_changed(rq, p, prev_class, oldprio, running); | 4288 | check_class_changed(rq, p, prev_class, oldprio, running); |
@@ -4581,19 +4579,17 @@ recheck: | |||
4581 | update_rq_clock(rq); | 4579 | update_rq_clock(rq); |
4582 | on_rq = p->se.on_rq; | 4580 | on_rq = p->se.on_rq; |
4583 | running = task_current(rq, p); | 4581 | running = task_current(rq, p); |
4584 | if (on_rq) { | 4582 | if (on_rq) |
4585 | deactivate_task(rq, p, 0); | 4583 | deactivate_task(rq, p, 0); |
4586 | if (running) | 4584 | if (running) |
4587 | p->sched_class->put_prev_task(rq, p); | 4585 | p->sched_class->put_prev_task(rq, p); |
4588 | } | ||
4589 | 4586 | ||
4590 | oldprio = p->prio; | 4587 | oldprio = p->prio; |
4591 | __setscheduler(rq, p, policy, param->sched_priority); | 4588 | __setscheduler(rq, p, policy, param->sched_priority); |
4592 | 4589 | ||
4590 | if (running) | ||
4591 | p->sched_class->set_curr_task(rq); | ||
4593 | if (on_rq) { | 4592 | if (on_rq) { |
4594 | if (running) | ||
4595 | p->sched_class->set_curr_task(rq); | ||
4596 | |||
4597 | activate_task(rq, p, 0); | 4593 | activate_task(rq, p, 0); |
4598 | 4594 | ||
4599 | check_class_changed(rq, p, prev_class, oldprio, running); | 4595 | check_class_changed(rq, p, prev_class, oldprio, running); |
@@ -7618,11 +7614,10 @@ void sched_move_task(struct task_struct *tsk) | |||
7618 | running = task_current(rq, tsk); | 7614 | running = task_current(rq, tsk); |
7619 | on_rq = tsk->se.on_rq; | 7615 | on_rq = tsk->se.on_rq; |
7620 | 7616 | ||
7621 | if (on_rq) { | 7617 | if (on_rq) |
7622 | dequeue_task(rq, tsk, 0); | 7618 | dequeue_task(rq, tsk, 0); |
7623 | if (unlikely(running)) | 7619 | if (unlikely(running)) |
7624 | tsk->sched_class->put_prev_task(rq, tsk); | 7620 | tsk->sched_class->put_prev_task(rq, tsk); |
7625 | } | ||
7626 | 7621 | ||
7627 | set_task_rq(tsk, task_cpu(tsk)); | 7622 | set_task_rq(tsk, task_cpu(tsk)); |
7628 | 7623 | ||
@@ -7631,11 +7626,10 @@ void sched_move_task(struct task_struct *tsk) | |||
7631 | tsk->sched_class->moved_group(tsk); | 7626 | tsk->sched_class->moved_group(tsk); |
7632 | #endif | 7627 | #endif |
7633 | 7628 | ||
7634 | if (on_rq) { | 7629 | if (unlikely(running)) |
7635 | if (unlikely(running)) | 7630 | tsk->sched_class->set_curr_task(rq); |
7636 | tsk->sched_class->set_curr_task(rq); | 7631 | if (on_rq) |
7637 | enqueue_task(rq, tsk, 0); | 7632 | enqueue_task(rq, tsk, 0); |
7638 | } | ||
7639 | 7633 | ||
7640 | task_rq_unlock(rq, &flags); | 7634 | task_rq_unlock(rq, &flags); |
7641 | } | 7635 | } |