From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 1 Aug 2009 01:34:24 +0200
Subject: tracing/bkl: Add bkl ftrace events

Add two events lock_kernel and unlock_kernel() to trace the bkl uses.
This opens the door for userspace tools to perform statistics about
the callsites that use it, dependencies with other locks (by pairing
the trace with lock events), use with recursivity and so on...

The {__reacquire,release}_kernel_lock() events are not traced because
these are called from schedule, thus the sched events are sufficient
to trace them.

Example of a trace:

hald-addon-stor-4152  [000]   165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put()
hald-addon-stor-4152  [000]   167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get()

How to get the callsites that acquire it recursively:

cd /debug/tracing/events/bkl
echo "lock_depth > 0" > filter

firefox-4951  [001]   206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode()

You can also filter by file and/or line.

v2: Use of FILTER_PTR_STRING attribute for files and lines fields to
    make them traceable.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 lib/kernel_lock.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'lib/kernel_lock.c')

diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 39f1029e3525..5c10b2e1fd08 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -5,10 +5,11 @@
  * relegated to obsolescence, but used by various less
  * important (or lazy) subsystems.
  */
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#define CREATE_TRACE_POINTS
+#include <linux/smp_lock.h>
 
 /*
  * The 'big kernel lock'
@@ -113,7 +114,7 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc lock_kernel(void)
+void __lockfunc _lock_kernel(void)
 {
 	int depth = current->lock_depth+1;
 	if (likely(!depth))
@@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void)
 	current->lock_depth = depth;
 }
 
-void __lockfunc unlock_kernel(void)
+void __lockfunc _unlock_kernel(void)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
 }
 
-EXPORT_SYMBOL(lock_kernel);
-EXPORT_SYMBOL(unlock_kernel);
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
 
-- 
cgit v1.2.2


From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 28 Sep 2009 17:12:49 +0200
Subject: tracing: Pushdown the bkl tracepoints calls

Currently we are calling the bkl tracepoint callbacks just before the
bkl lock/unlock operations, ie the tracepoint call is not inside a
lock_kernel() function but inside a lock_kernel() macro. Hence the
bkl trace event header must be included from smp_lock.h. This raises
some nasty circular header dependencies:

linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h
-> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h
-> linux/smp_lock.h

This results in incomplete event declarations, spurious event
definitions and other kind of funny behaviours.

This is hardly fixable without ugly workarounds. So instead, we push
the file name, line number and function name as lock_kernel()
parameters, so that we only deal with the trace event header from
lib/kernel_lock.c

This adds two parameters to lock_kernel() and unlock_kernel() but
it should be fine wrt to performances because this pair dos not seem
to be called in fast paths.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 lib/kernel_lock.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'lib/kernel_lock.c')

diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 5c10b2e1fd08..4ebfa5a164d7 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -8,9 +8,11 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
-#define CREATE_TRACE_POINTS
 #include <linux/smp_lock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
+
 /*
  * The 'big kernel lock'
  *
@@ -114,19 +116,24 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc _lock_kernel(void)
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 {
-	int depth = current->lock_depth+1;
+	int depth = current->lock_depth + 1;
+
+	trace_lock_kernel(func, file, line);
+
 	if (likely(!depth))
 		__lock_kernel();
 	current->lock_depth = depth;
 }
 
-void __lockfunc _unlock_kernel(void)
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
+
+	trace_unlock_kernel(func, file, line);
 }
 
 EXPORT_SYMBOL(_lock_kernel);
-- 
cgit v1.2.2


From f01eb3640308c005d31b29d0a8bc2b7acb4e3f75 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 12 Dec 2009 14:46:33 -0800
Subject: [BKL] add 'might_sleep()' to the outermost lock taker

As shown by the previous patch (6698e3472: "tty: Fix BKL taken under a
spinlock bug introduced in the BKL split") the BKL removal is prone to
some subtle issues, where removing the BKL in one place may in fact make
a previously nested BKL call the new outer call, and then prone to nasty
deadlocks with other spinlocks.

In general, we should never take the BKL while we're holding a spinlock,
so let's just add a "might_sleep()" to it (even though the BKL doesn't
technically sleep - at least not yet), and we'll get nice warnings the
next time this kind of problem happens during BKL removal.

Acked-and-Tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kernel_lock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib/kernel_lock.c')

diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 4ebfa5a164d7..5526b46aba94 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -122,8 +122,10 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 
 	trace_lock_kernel(func, file, line);
 
-	if (likely(!depth))
+	if (likely(!depth)) {
+		might_sleep();
 		__lock_kernel();
+	}
 	current->lock_depth = depth;
 }
 
-- 
cgit v1.2.2


From 9828ea9d75c38fe3dce05d00566eed61c85732e6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Dec 2009 20:55:53 +0100
Subject: locking: Further name space cleanups

The name space hierarchy for the internal lock functions is now a bit
backwards. raw_spin* functions map to _spin* which use __spin*, while
we would like to have _raw_spin* and __raw_spin*.

_raw_spin* is already used by lock debugging, so rename those funtions
to do_raw_spin* to free up the _raw_spin* name space.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 lib/kernel_lock.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'lib/kernel_lock.c')

diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 5526b46aba94..fdd23cdb53f3 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -36,12 +36,12 @@ static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
  * If it successfully gets the lock, it should increment
  * the preemption count like any spinlock does.
  *
- * (This works on UP too - _raw_spin_trylock will never
+ * (This works on UP too - do_raw_spin_trylock will never
  * return false in that case)
  */
 int __lockfunc __reacquire_kernel_lock(void)
 {
-	while (!_raw_spin_trylock(&kernel_flag)) {
+	while (!do_raw_spin_trylock(&kernel_flag)) {
 		if (need_resched())
 			return -EAGAIN;
 		cpu_relax();
@@ -52,27 +52,27 @@ int __lockfunc __reacquire_kernel_lock(void)
 
 void __lockfunc __release_kernel_lock(void)
 {
-	_raw_spin_unlock(&kernel_flag);
+	do_raw_spin_unlock(&kernel_flag);
 	preempt_enable_no_resched();
 }
 
 /*
  * These are the BKL spinlocks - we try to be polite about preemption.
  * If SMP is not on (ie UP preemption), this all goes away because the
- * _raw_spin_trylock() will always succeed.
+ * do_raw_spin_trylock() will always succeed.
  */
 #ifdef CONFIG_PREEMPT
 static inline void __lock_kernel(void)
 {
 	preempt_disable();
-	if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
+	if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
 		/*
 		 * If preemption was disabled even before this
 		 * was called, there's nothing we can be polite
 		 * about - just spin.
 		 */
 		if (preempt_count() > 1) {
-			_raw_spin_lock(&kernel_flag);
+			do_raw_spin_lock(&kernel_flag);
 			return;
 		}
 
@@ -85,7 +85,7 @@ static inline void __lock_kernel(void)
 			while (spin_is_locked(&kernel_flag))
 				cpu_relax();
 			preempt_disable();
-		} while (!_raw_spin_trylock(&kernel_flag));
+		} while (!do_raw_spin_trylock(&kernel_flag));
 	}
 }
 
@@ -96,7 +96,7 @@ static inline void __lock_kernel(void)
  */
 static inline void __lock_kernel(void)
 {
-	_raw_spin_lock(&kernel_flag);
+	do_raw_spin_lock(&kernel_flag);
 }
 #endif
 
@@ -106,7 +106,7 @@ static inline void __unlock_kernel(void)
 	 * the BKL is not covered by lockdep, so we open-code the
 	 * unlocking sequence (and thus avoid the dep-chain ops):
 	 */
-	_raw_spin_unlock(&kernel_flag);
+	do_raw_spin_unlock(&kernel_flag);
 	preempt_enable();
 }
 
-- 
cgit v1.2.2


From fa4062e7eae8f484c90b9cdd850b5df39ab0e5a0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 14:45:06 +0100
Subject: bkl: Fixup core_lock fallout

kernel_lock.c emits a warning because a raw spinlock function is used
with a spinlock. Convert BKL to raw_spinlock.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 lib/kernel_lock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/kernel_lock.c')

diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index fdd23cdb53f3..b135d04aa48a 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -23,7 +23,7 @@
  *
  * Don't use in new code.
  */
-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
+static  __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
 
 
 /*
@@ -82,7 +82,7 @@ static inline void __lock_kernel(void)
 		 */
 		do {
 			preempt_enable();
-			while (spin_is_locked(&kernel_flag))
+			while (raw_spin_is_locked(&kernel_flag))
 				cpu_relax();
 			preempt_disable();
 		} while (!do_raw_spin_trylock(&kernel_flag));
-- 
cgit v1.2.2