lockdep: Print a nicer description for irq lock inversions

Locking order inversion due to interrupts is a subtle problem. When an irq lockiinversion discovered by lockdep it currently reports something like: [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] ... and then prints out the locks that are involved, as back traces. Judging by lkml feedback developers were routinely confused by what a HARDIRQ->safe to unsafe issue is all about, and sometimes even blew it off as a bug in lockdep. It is not obvious when lockdep prints this message about a lock that is never taken in interrupt context. After explaining the problems that lockdep is reporting, I decided to add a description of the problem in visual form. Now the following is shown: --- other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(lockA); local_irq_disable(); lock(&rq->lock); lock(lockA); <Interrupt> lock(&rq->lock); *** DEADLOCK *** --- The above is the case when the unsafe lock is taken while holding a lock taken in irq context. But when a lock is taken that also grabs a unsafe lock, the call chain is shown: --- other info that might help us debug this: Chain exists of: &rq->lock --> lockA --> lockC Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(lockC); local_irq_disable(); lock(&rq->lock); lock(lockA); <Interrupt> lock(&rq->lock); *** DEADLOCK *** Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20110421014259.132728798@goodmis.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Steven Rostedt <srostedt@redhat.com> 2011-04-20 21:41:54 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-04-22 05:06:57 -0400
commit: 3003eba313dd0e0502dd71548c36fe7c19801ce5 (patch)
tree: 2b23fc90378bbdfe77e0446771ee4f75eef35a29 /kernel/lockdep.c
parent: 91e8549bde9e5cc88c5a2e8c8114389279e240b5 (diff)
1 files changed, 70 insertions, 0 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 53a68956f131..7b2ffeedcebd 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -490,6 +490,18 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
        usage[i] = '\0';
 }
+static int __print_lock_name(struct lock_class *class)
+{
+        char str[KSYM_NAME_LEN];
+        const char *name;
+        name = class->name;
+        if (!name)
+                name = __get_key_name(class->key, str);
+        return printk("%s", name);
+}
 static void print_lock_name(struct lock_class *class)
 {
        char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
@@ -1325,6 +1337,62 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
        return;
 }
+static void
+print_irq_lock_scenario(struct lock_list *safe_entry,
+                        struct lock_list *unsafe_entry,
+                        struct held_lock *prev,
+                        struct held_lock *next)
+{
+        struct lock_class *safe_class = safe_entry->class;
+        struct lock_class *unsafe_class = unsafe_entry->class;
+        struct lock_class *middle_class = hlock_class(prev);
+        if (middle_class == safe_class)
+                middle_class = hlock_class(next);
+        /*
+         * A direct locking problem where unsafe_class lock is taken
+         * directly by safe_class lock, then all we need to show
+         * is the deadlock scenario, as it is obvious that the
+         * unsafe lock is taken under the safe lock.
+         *
+         * But if there is a chain instead, where the safe lock takes
+         * an intermediate lock (middle_class) where this lock is
+         * not the same as the safe lock, then the lock chain is
+         * used to describe the problem. Otherwise we would need
+         * to show a different CPU case for each link in the chain
+         * from the safe_class lock to the unsafe_class lock.
+         */
+        if (middle_class != unsafe_class) {
+                printk("Chain exists of:\n  ");
+                __print_lock_name(safe_class);
+                printk(" --> ");
+                __print_lock_name(middle_class);
+                printk(" --> ");
+                __print_lock_name(unsafe_class);
+                printk("\n\n");
+        }
+        printk(" Possible interrupt unsafe locking scenario:\n\n");
+        printk("       CPU0                    CPU1\n");
+        printk("       ----                    ----\n");
+        printk("  lock(");
+        __print_lock_name(unsafe_class);
+        printk(");\n");
+        printk("                               local_irq_disable();\n");
+        printk("                               lock(");
+        __print_lock_name(safe_class);
+        printk(");\n");
+        printk("                               lock(");
+        __print_lock_name(middle_class);
+        printk(");\n");
+        printk("  <Interrupt>\n");
+        printk("    lock(");
+        __print_lock_name(safe_class);
+        printk(");\n");
+        printk("\n *** DEADLOCK ***\n\n");
+}
 static int
 print_bad_irq_dependency(struct task_struct *curr,
                         struct lock_list *prev_root,
@@ -1376,6 +1444,8 @@ print_bad_irq_dependency(struct task_struct *curr,
        print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
        printk("\nother info that might help us debug this:\n\n");
+        print_irq_lock_scenario(backwards_entry, forwards_entry, prev, next);
        lockdep_print_held_locks(curr);
        printk("\nthe dependencies between %s-irq-safe lock", irqclass);
author	Steven Rostedt <srostedt@redhat.com>	2011-04-20 21:41:54 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-04-22 05:06:57 -0400
commit	3003eba313dd0e0502dd71548c36fe7c19801ce5 (patch)
tree	2b23fc90378bbdfe77e0446771ee4f75eef35a29 /kernel/lockdep.c
parent	91e8549bde9e5cc88c5a2e8c8114389279e240b5 (diff)

diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 53a68956f131..7b2ffeedcebd 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c
@@ -490,6 +490,18 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
490	usage[i] = '\0';	490	usage[i] = '\0';
491	}	491	}
492		492
		493	static int __print_lock_name(struct lock_class *class)
		494	{
		495	char str[KSYM_NAME_LEN];
		496	const char *name;
		497
		498	name = class->name;
		499	if (!name)
		500	name = __get_key_name(class->key, str);
		501
		502	return printk("%s", name);
		503	}
		504
493	static void print_lock_name(struct lock_class *class)	505	static void print_lock_name(struct lock_class *class)
494	{	506	{
495	char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];	507	char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
@@ -1325,6 +1337,62 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
1325	return;	1337	return;
1326	}	1338	}
1327		1339
		1340	static void
		1341	print_irq_lock_scenario(struct lock_list *safe_entry,
		1342	struct lock_list *unsafe_entry,
		1343	struct held_lock *prev,
		1344	struct held_lock *next)
		1345	{
		1346	struct lock_class *safe_class = safe_entry->class;
		1347	struct lock_class *unsafe_class = unsafe_entry->class;
		1348	struct lock_class *middle_class = hlock_class(prev);
		1349
		1350	if (middle_class == safe_class)
		1351	middle_class = hlock_class(next);
		1352
		1353	/*
		1354	* A direct locking problem where unsafe_class lock is taken
		1355	* directly by safe_class lock, then all we need to show
		1356	* is the deadlock scenario, as it is obvious that the
		1357	* unsafe lock is taken under the safe lock.
		1358	*
		1359	* But if there is a chain instead, where the safe lock takes
		1360	* an intermediate lock (middle_class) where this lock is
		1361	* not the same as the safe lock, then the lock chain is
		1362	* used to describe the problem. Otherwise we would need
		1363	* to show a different CPU case for each link in the chain
		1364	* from the safe_class lock to the unsafe_class lock.
		1365	*/
		1366	if (middle_class != unsafe_class) {
		1367	printk("Chain exists of:\n ");
		1368	__print_lock_name(safe_class);
		1369	printk(" --> ");
		1370	__print_lock_name(middle_class);
		1371	printk(" --> ");
		1372	__print_lock_name(unsafe_class);
		1373	printk("\n\n");
		1374	}
		1375
		1376	printk(" Possible interrupt unsafe locking scenario:\n\n");
		1377	printk(" CPU0 CPU1\n");
		1378	printk(" ---- ----\n");
		1379	printk(" lock(");
		1380	__print_lock_name(unsafe_class);
		1381	printk(");\n");
		1382	printk(" local_irq_disable();\n");
		1383	printk(" lock(");
		1384	__print_lock_name(safe_class);
		1385	printk(");\n");
		1386	printk(" lock(");
		1387	__print_lock_name(middle_class);
		1388	printk(");\n");
		1389	printk(" <Interrupt>\n");
		1390	printk(" lock(");
		1391	__print_lock_name(safe_class);
		1392	printk(");\n");
		1393	printk("\n * DEADLOCK *\n\n");
		1394	}
		1395
1328	static int	1396	static int
1329	print_bad_irq_dependency(struct task_struct *curr,	1397	print_bad_irq_dependency(struct task_struct *curr,
1330	struct lock_list *prev_root,	1398	struct lock_list *prev_root,
@@ -1376,6 +1444,8 @@ print_bad_irq_dependency(struct task_struct *curr,
1376	print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);	1444	print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
1377		1445
1378	printk("\nother info that might help us debug this:\n\n");	1446	printk("\nother info that might help us debug this:\n\n");
		1447	print_irq_lock_scenario(backwards_entry, forwards_entry, prev, next);
		1448
1379	lockdep_print_held_locks(curr);	1449	lockdep_print_held_locks(curr);
1380		1450
1381	printk("\nthe dependencies between %s-irq-safe lock", irqclass);	1451	printk("\nthe dependencies between %s-irq-safe lock", irqclass);