aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2007-03-05 03:30:51 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-03-05 10:57:53 -0500
commite81ce1f7ecdaed2844c75313b09af791d44e6373 (patch)
tree49e70223f9ca808c6c7fed5cf7ce00125ccca84a
parent6bb74df481223731af6c7e0ff3adb31f6442cfcd (diff)
[PATCH] timer/hrtimer: take per cpu locks in sane order
Doing something like this on a two cpu system # echo 0 > /sys/devices/system/cpu/cpu0/online # echo 1 > /sys/devices/system/cpu/cpu0/online # echo 0 > /sys/devices/system/cpu/cpu1/online will give me this: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.21-rc2-g562aa1d4-dirty #7 ------------------------------------------------------- bash/1282 is trying to acquire lock: (&cpu_base->lock_key){.+..}, at: [<000000000005f17e>] hrtimer_cpu_notify+0xc6/0x240 but task is already holding lock: (&cpu_base->lock_key#2){.+..}, at: [<000000000005f174>] hrtimer_cpu_notify+0xbc/0x240 which lock already depends on the new lock. This happens because we have the following code in kernel/hrtimer.c: migrate_hrtimers(int cpu) [...] old_base = &per_cpu(hrtimer_bases, cpu); new_base = &get_cpu_var(hrtimer_bases); [...] spin_lock(&new_base->lock); spin_lock(&old_base->lock); Which means the spinlocks are taken in an order which depends on which cpu gets shut down from which other cpu. Therefore lockdep complains that there might be an ABBA deadlock. Since migrate_hrtimers() gets only called on cpu hotplug it's safe to assume that it isn't executed concurrently on a The same problem exists in kernel/timer.c: migrate_timers(). As pointed out by Christian Borntraeger one possible solution to avoid the locking order complaints would be to make sure that the locks are always taken in the same order. E.g. by taking the lock of the cpu with the lower number first. To achieve this we introduce two new spinlock functions double_spin_lock and double_spin_unlock which lock or unlock two locks in a given order. Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: John Stultz <johnstul@us.ibm.com> Cc: Christian Borntraeger <cborntra@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/spinlock.h37
-rw-r--r--kernel/hrtimer.c9
-rw-r--r--kernel/timer.c8
3 files changed, 45 insertions, 9 deletions
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 61fef376ed2e..a946176db638 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -283,6 +283,43 @@ do { \
283}) 283})
284 284
285/* 285/*
286 * Locks two spinlocks l1 and l2.
287 * l1_first indicates if spinlock l1 should be taken first.
288 */
289static inline void double_spin_lock(spinlock_t *l1, spinlock_t *l2,
290 bool l1_first)
291 __acquires(l1)
292 __acquires(l2)
293{
294 if (l1_first) {
295 spin_lock(l1);
296 spin_lock(l2);
297 } else {
298 spin_lock(l2);
299 spin_lock(l1);
300 }
301}
302
303/*
304 * Unlocks two spinlocks l1 and l2.
305 * l1_taken_first indicates if spinlock l1 was taken first and therefore
306 * should be released after spinlock l2.
307 */
308static inline void double_spin_unlock(spinlock_t *l1, spinlock_t *l2,
309 bool l1_taken_first)
310 __releases(l1)
311 __releases(l2)
312{
313 if (l1_taken_first) {
314 spin_unlock(l2);
315 spin_unlock(l1);
316 } else {
317 spin_unlock(l1);
318 spin_unlock(l2);
319 }
320}
321
322/*
286 * Pull the atomic_t declaration: 323 * Pull the atomic_t declaration:
287 * (asm-mips/atomic.h needs above definitions) 324 * (asm-mips/atomic.h needs above definitions)
288 */ 325 */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 476cb0c0b4a4..de93a8176ca6 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1355,17 +1355,16 @@ static void migrate_hrtimers(int cpu)
1355 tick_cancel_sched_timer(cpu); 1355 tick_cancel_sched_timer(cpu);
1356 1356
1357 local_irq_disable(); 1357 local_irq_disable();
1358 1358 double_spin_lock(&new_base->lock, &old_base->lock,
1359 spin_lock(&new_base->lock); 1359 smp_processor_id() < cpu);
1360 spin_lock(&old_base->lock);
1361 1360
1362 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1361 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1363 migrate_hrtimer_list(&old_base->clock_base[i], 1362 migrate_hrtimer_list(&old_base->clock_base[i],
1364 &new_base->clock_base[i]); 1363 &new_base->clock_base[i]);
1365 } 1364 }
1366 spin_unlock(&old_base->lock);
1367 spin_unlock(&new_base->lock);
1368 1365
1366 double_spin_unlock(&new_base->lock, &old_base->lock,
1367 smp_processor_id() < cpu);
1369 local_irq_enable(); 1368 local_irq_enable();
1370 put_cpu_var(hrtimer_bases); 1369 put_cpu_var(hrtimer_bases);
1371} 1370}
diff --git a/kernel/timer.c b/kernel/timer.c
index 6663a87f7304..8ad384253ef2 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1651,8 +1651,8 @@ static void __devinit migrate_timers(int cpu)
1651 new_base = get_cpu_var(tvec_bases); 1651 new_base = get_cpu_var(tvec_bases);
1652 1652
1653 local_irq_disable(); 1653 local_irq_disable();
1654 spin_lock(&new_base->lock); 1654 double_spin_lock(&new_base->lock, &old_base->lock,
1655 spin_lock(&old_base->lock); 1655 smp_processor_id() < cpu);
1656 1656
1657 BUG_ON(old_base->running_timer); 1657 BUG_ON(old_base->running_timer);
1658 1658
@@ -1665,8 +1665,8 @@ static void __devinit migrate_timers(int cpu)
1665 migrate_timer_list(new_base, old_base->tv5.vec + i); 1665 migrate_timer_list(new_base, old_base->tv5.vec + i);
1666 } 1666 }
1667 1667
1668 spin_unlock(&old_base->lock); 1668 double_spin_unlock(&new_base->lock, &old_base->lock,
1669 spin_unlock(&new_base->lock); 1669 smp_processor_id() < cpu);
1670 local_irq_enable(); 1670 local_irq_enable();
1671 put_cpu_var(tvec_bases); 1671 put_cpu_var(tvec_bases);
1672} 1672}