aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2011-04-20 21:41:54 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-22 05:06:57 -0400
commit3003eba313dd0e0502dd71548c36fe7c19801ce5 (patch)
tree2b23fc90378bbdfe77e0446771ee4f75eef35a29
parent91e8549bde9e5cc88c5a2e8c8114389279e240b5 (diff)
lockdep: Print a nicer description for irq lock inversions
Locking order inversion due to interrupts is a subtle problem. When an irq lockiinversion discovered by lockdep it currently reports something like: [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] ... and then prints out the locks that are involved, as back traces. Judging by lkml feedback developers were routinely confused by what a HARDIRQ->safe to unsafe issue is all about, and sometimes even blew it off as a bug in lockdep. It is not obvious when lockdep prints this message about a lock that is never taken in interrupt context. After explaining the problems that lockdep is reporting, I decided to add a description of the problem in visual form. Now the following is shown: --- other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(lockA); local_irq_disable(); lock(&rq->lock); lock(lockA); <Interrupt> lock(&rq->lock); *** DEADLOCK *** --- The above is the case when the unsafe lock is taken while holding a lock taken in irq context. But when a lock is taken that also grabs a unsafe lock, the call chain is shown: --- other info that might help us debug this: Chain exists of: &rq->lock --> lockA --> lockC Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(lockC); local_irq_disable(); lock(&rq->lock); lock(lockA); <Interrupt> lock(&rq->lock); *** DEADLOCK *** Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20110421014259.132728798@goodmis.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/lockdep.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 53a68956f131..7b2ffeedcebd 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -490,6 +490,18 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
490 usage[i] = '\0'; 490 usage[i] = '\0';
491} 491}
492 492
493static int __print_lock_name(struct lock_class *class)
494{
495 char str[KSYM_NAME_LEN];
496 const char *name;
497
498 name = class->name;
499 if (!name)
500 name = __get_key_name(class->key, str);
501
502 return printk("%s", name);
503}
504
493static void print_lock_name(struct lock_class *class) 505static void print_lock_name(struct lock_class *class)
494{ 506{
495 char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS]; 507 char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
@@ -1325,6 +1337,62 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
1325 return; 1337 return;
1326} 1338}
1327 1339
1340static void
1341print_irq_lock_scenario(struct lock_list *safe_entry,
1342 struct lock_list *unsafe_entry,
1343 struct held_lock *prev,
1344 struct held_lock *next)
1345{
1346 struct lock_class *safe_class = safe_entry->class;
1347 struct lock_class *unsafe_class = unsafe_entry->class;
1348 struct lock_class *middle_class = hlock_class(prev);
1349
1350 if (middle_class == safe_class)
1351 middle_class = hlock_class(next);
1352
1353 /*
1354 * A direct locking problem where unsafe_class lock is taken
1355 * directly by safe_class lock, then all we need to show
1356 * is the deadlock scenario, as it is obvious that the
1357 * unsafe lock is taken under the safe lock.
1358 *
1359 * But if there is a chain instead, where the safe lock takes
1360 * an intermediate lock (middle_class) where this lock is
1361 * not the same as the safe lock, then the lock chain is
1362 * used to describe the problem. Otherwise we would need
1363 * to show a different CPU case for each link in the chain
1364 * from the safe_class lock to the unsafe_class lock.
1365 */
1366 if (middle_class != unsafe_class) {
1367 printk("Chain exists of:\n ");
1368 __print_lock_name(safe_class);
1369 printk(" --> ");
1370 __print_lock_name(middle_class);
1371 printk(" --> ");
1372 __print_lock_name(unsafe_class);
1373 printk("\n\n");
1374 }
1375
1376 printk(" Possible interrupt unsafe locking scenario:\n\n");
1377 printk(" CPU0 CPU1\n");
1378 printk(" ---- ----\n");
1379 printk(" lock(");
1380 __print_lock_name(unsafe_class);
1381 printk(");\n");
1382 printk(" local_irq_disable();\n");
1383 printk(" lock(");
1384 __print_lock_name(safe_class);
1385 printk(");\n");
1386 printk(" lock(");
1387 __print_lock_name(middle_class);
1388 printk(");\n");
1389 printk(" <Interrupt>\n");
1390 printk(" lock(");
1391 __print_lock_name(safe_class);
1392 printk(");\n");
1393 printk("\n *** DEADLOCK ***\n\n");
1394}
1395
1328static int 1396static int
1329print_bad_irq_dependency(struct task_struct *curr, 1397print_bad_irq_dependency(struct task_struct *curr,
1330 struct lock_list *prev_root, 1398 struct lock_list *prev_root,
@@ -1376,6 +1444,8 @@ print_bad_irq_dependency(struct task_struct *curr,
1376 print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); 1444 print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
1377 1445
1378 printk("\nother info that might help us debug this:\n\n"); 1446 printk("\nother info that might help us debug this:\n\n");
1447 print_irq_lock_scenario(backwards_entry, forwards_entry, prev, next);
1448
1379 lockdep_print_held_locks(curr); 1449 lockdep_print_held_locks(curr);
1380 1450
1381 printk("\nthe dependencies between %s-irq-safe lock", irqclass); 1451 printk("\nthe dependencies between %s-irq-safe lock", irqclass);