aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2006-03-23 05:59:20 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-23 10:38:02 -0500
commite9028b0ff2bad1954568604dc17725692c8524d6 (patch)
treedbff742d39520574e5985930333f1d421282e080 /kernel/sched.c
parent5be0e9511990dc307670dc66a42073db96b20f26 (diff)
[PATCH] fix scheduler deadlock
We have noticed lockups during boot when stress testing kexec on ppc64. Two cpus would deadlock in scheduler code trying to grab already taken spinlocks. The double_rq_lock code uses the address of the runqueue to order the taking of multiple locks. This address is a per cpu variable: if (rq1 < rq2) { spin_lock(&rq1->lock); spin_lock(&rq2->lock); } else { spin_lock(&rq2->lock); spin_lock(&rq1->lock); } On the other hand, the code in wake_sleeping_dependent uses the cpu id order to grab locks: for_each_cpu_mask(i, sibling_map) spin_lock(&cpu_rq(i)->lock); This means we rely on the address of per cpu data increasing as cpu ids increase. While this will be true for the generic percpu implementation it may not be true for arch specific implementations. One way to solve this is to always take runqueues in cpu id order. To do this we add a cpu variable to the runqueue and check it in the double runqueue locking functions. Signed-off-by: Anton Blanchard <anton@samba.org> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c9
1 files changed, 7 insertions, 2 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6b6e0d70eb30..a5bd60453eae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -237,6 +237,7 @@ struct runqueue {
237 237
238 task_t *migration_thread; 238 task_t *migration_thread;
239 struct list_head migration_queue; 239 struct list_head migration_queue;
240 int cpu;
240#endif 241#endif
241 242
242#ifdef CONFIG_SCHEDSTATS 243#ifdef CONFIG_SCHEDSTATS
@@ -1654,6 +1655,9 @@ unsigned long nr_iowait(void)
1654/* 1655/*
1655 * double_rq_lock - safely lock two runqueues 1656 * double_rq_lock - safely lock two runqueues
1656 * 1657 *
1658 * We must take them in cpu order to match code in
1659 * dependent_sleeper and wake_dependent_sleeper.
1660 *
1657 * Note this does not disable interrupts like task_rq_lock, 1661 * Note this does not disable interrupts like task_rq_lock,
1658 * you need to do so manually before calling. 1662 * you need to do so manually before calling.
1659 */ 1663 */
@@ -1665,7 +1669,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1665 spin_lock(&rq1->lock); 1669 spin_lock(&rq1->lock);
1666 __acquire(rq2->lock); /* Fake it out ;) */ 1670 __acquire(rq2->lock); /* Fake it out ;) */
1667 } else { 1671 } else {
1668 if (rq1 < rq2) { 1672 if (rq1->cpu < rq2->cpu) {
1669 spin_lock(&rq1->lock); 1673 spin_lock(&rq1->lock);
1670 spin_lock(&rq2->lock); 1674 spin_lock(&rq2->lock);
1671 } else { 1675 } else {
@@ -1701,7 +1705,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1701 __acquires(this_rq->lock) 1705 __acquires(this_rq->lock)
1702{ 1706{
1703 if (unlikely(!spin_trylock(&busiest->lock))) { 1707 if (unlikely(!spin_trylock(&busiest->lock))) {
1704 if (busiest < this_rq) { 1708 if (busiest->cpu < this_rq->cpu) {
1705 spin_unlock(&this_rq->lock); 1709 spin_unlock(&this_rq->lock);
1706 spin_lock(&busiest->lock); 1710 spin_lock(&busiest->lock);
1707 spin_lock(&this_rq->lock); 1711 spin_lock(&this_rq->lock);
@@ -6029,6 +6033,7 @@ void __init sched_init(void)
6029 rq->push_cpu = 0; 6033 rq->push_cpu = 0;
6030 rq->migration_thread = NULL; 6034 rq->migration_thread = NULL;
6031 INIT_LIST_HEAD(&rq->migration_queue); 6035 INIT_LIST_HEAD(&rq->migration_queue);
6036 rq->cpu = i;
6032#endif 6037#endif
6033 atomic_set(&rq->nr_iowait, 0); 6038 atomic_set(&rq->nr_iowait, 0);
6034 6039