aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-05 04:20:47 -0500
committerIngo Molnar <mingo@elte.hu>2009-03-05 04:20:47 -0500
commitc4ef144a9d0803eb0a2d4110ae87e7f34e667ded (patch)
treeb4b5e472bafb3d5d0d8ea26680e1d8cc87365c30
parenta1be621dfacbef0fd374d8acd553d71e07bf29ac (diff)
parentefed792d6738964f399a508ef9e831cd60fa4657 (diff)
Merge branch 'tracing/ftrace' into tracing/core
-rw-r--r--Documentation/lockdep-design.txt30
-rw-r--r--include/linux/lockdep.h50
-rw-r--r--include/linux/mutex.h5
-rw-r--r--include/linux/ring_buffer.h7
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/timer.h93
-rw-r--r--include/trace/lockdep.h9
-rw-r--r--include/trace/lockdep_event_types.h44
-rw-r--r--include/trace/trace_event_types.h1
-rw-r--r--include/trace/trace_events.h1
-rw-r--r--kernel/lockdep.c531
-rw-r--r--kernel/lockdep_internals.h45
-rw-r--r--kernel/lockdep_proc.c22
-rw-r--r--kernel/lockdep_states.h9
-rw-r--r--kernel/mutex-debug.c9
-rw-r--r--kernel/mutex-debug.h18
-rw-r--r--kernel/mutex.c121
-rw-r--r--kernel/mutex.h22
-rw-r--r--kernel/sched.c71
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/timer.c68
-rw-r--r--kernel/trace/ring_buffer.c118
-rw-r--r--kernel/trace/trace.c288
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events_stage_3.h4
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c1
-rw-r--r--mm/vmscan.c2
31 files changed, 1155 insertions, 432 deletions
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index 48877301815..938ea22f2cc 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -27,33 +27,37 @@ lock-class.
27State 27State
28----- 28-----
29 29
30The validator tracks lock-class usage history into 5 separate state bits: 30The validator tracks lock-class usage history into 4n + 1 separate state bits:
31 31
32- 'ever held in hardirq context' [ == hardirq-safe ] 32- 'ever held in STATE context'
33- 'ever held in softirq context' [ == softirq-safe ] 33- 'ever head as readlock in STATE context'
34- 'ever held with hardirqs enabled' [ == hardirq-unsafe ] 34- 'ever head with STATE enabled'
35- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ] 35- 'ever head as readlock with STATE enabled'
36
37Where STATE can be either one of (kernel/lockdep_states.h)
38 - hardirq
39 - softirq
40 - reclaim_fs
36 41
37- 'ever used' [ == !unused ] 42- 'ever used' [ == !unused ]
38 43
39When locking rules are violated, these 4 state bits are presented in the 44When locking rules are violated, these state bits are presented in the
40locking error messages, inside curlies. A contrived example: 45locking error messages, inside curlies. A contrived example:
41 46
42 modprobe/2287 is trying to acquire lock: 47 modprobe/2287 is trying to acquire lock:
43 (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 48 (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
44 49
45 but task is already holding lock: 50 but task is already holding lock:
46 (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 51 (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
47 52
48 53
49The bit position indicates hardirq, softirq, hardirq-read, 54The bit position indicates STATE, STATE-read, for each of the states listed
50softirq-read respectively, and the character displayed in each 55above, and the character displayed in each indicates:
51indicates:
52 56
53 '.' acquired while irqs disabled 57 '.' acquired while irqs disabled
54 '+' acquired in irq context 58 '+' acquired in irq context
55 '-' acquired with irqs enabled 59 '-' acquired with irqs enabled
56 '?' read acquired in irq context with irqs enabled. 60 '?' acquired in irq context with irqs enabled.
57 61
58Unused mutexes cannot be part of the cause of an error. 62Unused mutexes cannot be part of the cause of an error.
59 63
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 23bf02fb124..5a58ea3e91e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -20,43 +20,10 @@ struct lockdep_map;
20#include <linux/stacktrace.h> 20#include <linux/stacktrace.h>
21 21
22/* 22/*
23 * Lock-class usage-state bits: 23 * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
24 * the total number of states... :-(
24 */ 25 */
25enum lock_usage_bit 26#define XXX_LOCK_USAGE_STATES (1+3*4)
26{
27 LOCK_USED = 0,
28 LOCK_USED_IN_HARDIRQ,
29 LOCK_USED_IN_SOFTIRQ,
30 LOCK_ENABLED_SOFTIRQS,
31 LOCK_ENABLED_HARDIRQS,
32 LOCK_USED_IN_HARDIRQ_READ,
33 LOCK_USED_IN_SOFTIRQ_READ,
34 LOCK_ENABLED_SOFTIRQS_READ,
35 LOCK_ENABLED_HARDIRQS_READ,
36 LOCK_USAGE_STATES
37};
38
39/*
40 * Usage-state bitmasks:
41 */
42#define LOCKF_USED (1 << LOCK_USED)
43#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ)
44#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ)
45#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS)
46#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS)
47
48#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
49#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
50
51#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ)
52#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ)
53#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ)
54#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ)
55
56#define LOCKF_ENABLED_IRQS_READ \
57 (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
58#define LOCKF_USED_IN_IRQ_READ \
59 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
60 27
61#define MAX_LOCKDEP_SUBCLASSES 8UL 28#define MAX_LOCKDEP_SUBCLASSES 8UL
62 29
@@ -97,7 +64,7 @@ struct lock_class {
97 * IRQ/softirq usage tracking bits: 64 * IRQ/softirq usage tracking bits:
98 */ 65 */
99 unsigned long usage_mask; 66 unsigned long usage_mask;
100 struct stack_trace usage_traces[LOCK_USAGE_STATES]; 67 struct stack_trace usage_traces[XXX_LOCK_USAGE_STATES];
101 68
102 /* 69 /*
103 * These fields represent a directed graph of lock dependencies, 70 * These fields represent a directed graph of lock dependencies,
@@ -324,7 +291,11 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
324 lock_set_class(lock, lock->name, lock->key, subclass, ip); 291 lock_set_class(lock, lock->name, lock->key, subclass, ip);
325} 292}
326 293
327# define INIT_LOCKDEP .lockdep_recursion = 0, 294extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
295extern void lockdep_clear_current_reclaim_state(void);
296extern void lockdep_trace_alloc(gfp_t mask);
297
298# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
328 299
329#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) 300#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
330 301
@@ -342,6 +313,9 @@ static inline void lockdep_on(void)
342# define lock_release(l, n, i) do { } while (0) 313# define lock_release(l, n, i) do { } while (0)
343# define lock_set_class(l, n, k, s, i) do { } while (0) 314# define lock_set_class(l, n, k, s, i) do { } while (0)
344# define lock_set_subclass(l, s, i) do { } while (0) 315# define lock_set_subclass(l, s, i) do { } while (0)
316# define lockdep_set_current_reclaim_state(g) do { } while (0)
317# define lockdep_clear_current_reclaim_state() do { } while (0)
318# define lockdep_trace_alloc(g) do { } while (0)
345# define lockdep_init() do { } while (0) 319# define lockdep_init() do { } while (0)
346# define lockdep_info() do { } while (0) 320# define lockdep_info() do { } while (0)
347# define lockdep_init_map(lock, name, key, sub) \ 321# define lockdep_init_map(lock, name, key, sub) \
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 7a0e5c4f807..3069ec7e0ab 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -50,8 +50,10 @@ struct mutex {
50 atomic_t count; 50 atomic_t count;
51 spinlock_t wait_lock; 51 spinlock_t wait_lock;
52 struct list_head wait_list; 52 struct list_head wait_list;
53#ifdef CONFIG_DEBUG_MUTEXES 53#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
54 struct thread_info *owner; 54 struct thread_info *owner;
55#endif
56#ifdef CONFIG_DEBUG_MUTEXES
55 const char *name; 57 const char *name;
56 void *magic; 58 void *magic;
57#endif 59#endif
@@ -68,7 +70,6 @@ struct mutex_waiter {
68 struct list_head list; 70 struct list_head list;
69 struct task_struct *task; 71 struct task_struct *task;
70#ifdef CONFIG_DEBUG_MUTEXES 72#ifdef CONFIG_DEBUG_MUTEXES
71 struct mutex *lock;
72 void *magic; 73 void *magic;
73#endif 74#endif
74}; 75};
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index f5e793d69bd..79fcbc4b09d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -121,6 +121,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
121u64 ring_buffer_time_stamp(int cpu); 121u64 ring_buffer_time_stamp(int cpu);
122void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); 122void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
123 123
124size_t ring_buffer_page_len(void *page);
125
126
124/* 127/*
125 * The below functions are fine to use outside the tracing facility. 128 * The below functions are fine to use outside the tracing facility.
126 */ 129 */
@@ -138,8 +141,8 @@ static inline int tracing_is_on(void) { return 0; }
138 141
139void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); 142void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
140void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); 143void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
141int ring_buffer_read_page(struct ring_buffer *buffer, 144int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
142 void **data_page, int cpu, int full); 145 size_t len, int cpu, int full);
143 146
144enum ring_buffer_flags { 147enum ring_buffer_flags {
145 RB_FL_OVERWRITE = 1 << 0, 148 RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 698ce978e97..0237d124089 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -333,7 +333,9 @@ extern signed long schedule_timeout(signed long timeout);
333extern signed long schedule_timeout_interruptible(signed long timeout); 333extern signed long schedule_timeout_interruptible(signed long timeout);
334extern signed long schedule_timeout_killable(signed long timeout); 334extern signed long schedule_timeout_killable(signed long timeout);
335extern signed long schedule_timeout_uninterruptible(signed long timeout); 335extern signed long schedule_timeout_uninterruptible(signed long timeout);
336asmlinkage void __schedule(void);
336asmlinkage void schedule(void); 337asmlinkage void schedule(void);
338extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
337 339
338struct nsproxy; 340struct nsproxy;
339struct user_namespace; 341struct user_namespace;
@@ -1330,6 +1332,7 @@ struct task_struct {
1330 int lockdep_depth; 1332 int lockdep_depth;
1331 unsigned int lockdep_recursion; 1333 unsigned int lockdep_recursion;
1332 struct held_lock held_locks[MAX_LOCK_DEPTH]; 1334 struct held_lock held_locks[MAX_LOCK_DEPTH];
1335 gfp_t lockdep_reclaim_gfp;
1333#endif 1336#endif
1334 1337
1335/* journalling filesystem info */ 1338/* journalling filesystem info */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index daf9685b861..51774eb87cc 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -5,6 +5,7 @@
5#include <linux/ktime.h> 5#include <linux/ktime.h>
6#include <linux/stddef.h> 6#include <linux/stddef.h>
7#include <linux/debugobjects.h> 7#include <linux/debugobjects.h>
8#include <linux/stringify.h>
8 9
9struct tvec_base; 10struct tvec_base;
10 11
@@ -21,52 +22,126 @@ struct timer_list {
21 char start_comm[16]; 22 char start_comm[16];
22 int start_pid; 23 int start_pid;
23#endif 24#endif
25#ifdef CONFIG_LOCKDEP
26 struct lockdep_map lockdep_map;
27#endif
24}; 28};
25 29
26extern struct tvec_base boot_tvec_bases; 30extern struct tvec_base boot_tvec_bases;
27 31
32#ifdef CONFIG_LOCKDEP
33/*
34 * NB: because we have to copy the lockdep_map, setting the lockdep_map key
35 * (second argument) here is required, otherwise it could be initialised to
36 * the copy of the lockdep_map later! We use the pointer to and the string
37 * "<file>:<line>" as the key resp. the name of the lockdep_map.
38 */
39#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) \
40 .lockdep_map = STATIC_LOCKDEP_MAP_INIT(_kn, &_kn),
41#else
42#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
43#endif
44
28#define TIMER_INITIALIZER(_function, _expires, _data) { \ 45#define TIMER_INITIALIZER(_function, _expires, _data) { \
29 .entry = { .prev = TIMER_ENTRY_STATIC }, \ 46 .entry = { .prev = TIMER_ENTRY_STATIC }, \
30 .function = (_function), \ 47 .function = (_function), \
31 .expires = (_expires), \ 48 .expires = (_expires), \
32 .data = (_data), \ 49 .data = (_data), \
33 .base = &boot_tvec_bases, \ 50 .base = &boot_tvec_bases, \
51 __TIMER_LOCKDEP_MAP_INITIALIZER( \
52 __FILE__ ":" __stringify(__LINE__)) \
34 } 53 }
35 54
36#define DEFINE_TIMER(_name, _function, _expires, _data) \ 55#define DEFINE_TIMER(_name, _function, _expires, _data) \
37 struct timer_list _name = \ 56 struct timer_list _name = \
38 TIMER_INITIALIZER(_function, _expires, _data) 57 TIMER_INITIALIZER(_function, _expires, _data)
39 58
40void init_timer(struct timer_list *timer); 59void init_timer_key(struct timer_list *timer,
41void init_timer_deferrable(struct timer_list *timer); 60 const char *name,
61 struct lock_class_key *key);
62void init_timer_deferrable_key(struct timer_list *timer,
63 const char *name,
64 struct lock_class_key *key);
65
66#ifdef CONFIG_LOCKDEP
67#define init_timer(timer) \
68 do { \
69 static struct lock_class_key __key; \
70 init_timer_key((timer), #timer, &__key); \
71 } while (0)
72
73#define init_timer_deferrable(timer) \
74 do { \
75 static struct lock_class_key __key; \
76 init_timer_deferrable_key((timer), #timer, &__key); \
77 } while (0)
78
79#define init_timer_on_stack(timer) \
80 do { \
81 static struct lock_class_key __key; \
82 init_timer_on_stack_key((timer), #timer, &__key); \
83 } while (0)
84
85#define setup_timer(timer, fn, data) \
86 do { \
87 static struct lock_class_key __key; \
88 setup_timer_key((timer), #timer, &__key, (fn), (data));\
89 } while (0)
90
91#define setup_timer_on_stack(timer, fn, data) \
92 do { \
93 static struct lock_class_key __key; \
94 setup_timer_on_stack_key((timer), #timer, &__key, \
95 (fn), (data)); \
96 } while (0)
97#else
98#define init_timer(timer)\
99 init_timer_key((timer), NULL, NULL)
100#define init_timer_deferrable(timer)\
101 init_timer_deferrable_key((timer), NULL, NULL)
102#define init_timer_on_stack(timer)\
103 init_timer_on_stack_key((timer), NULL, NULL)
104#define setup_timer(timer, fn, data)\
105 setup_timer_key((timer), NULL, NULL, (fn), (data))
106#define setup_timer_on_stack(timer, fn, data)\
107 setup_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
108#endif
42 109
43#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 110#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
44extern void init_timer_on_stack(struct timer_list *timer); 111extern void init_timer_on_stack_key(struct timer_list *timer,
112 const char *name,
113 struct lock_class_key *key);
45extern void destroy_timer_on_stack(struct timer_list *timer); 114extern void destroy_timer_on_stack(struct timer_list *timer);
46#else 115#else
47static inline void destroy_timer_on_stack(struct timer_list *timer) { } 116static inline void destroy_timer_on_stack(struct timer_list *timer) { }
48static inline void init_timer_on_stack(struct timer_list *timer) 117static inline void init_timer_on_stack_key(struct timer_list *timer,
118 const char *name,
119 struct lock_class_key *key)
49{ 120{
50 init_timer(timer); 121 init_timer_key(timer, name, key);
51} 122}
52#endif 123#endif
53 124
54static inline void setup_timer(struct timer_list * timer, 125static inline void setup_timer_key(struct timer_list * timer,
126 const char *name,
127 struct lock_class_key *key,
55 void (*function)(unsigned long), 128 void (*function)(unsigned long),
56 unsigned long data) 129 unsigned long data)
57{ 130{
58 timer->function = function; 131 timer->function = function;
59 timer->data = data; 132 timer->data = data;
60 init_timer(timer); 133 init_timer_key(timer, name, key);
61} 134}
62 135
63static inline void setup_timer_on_stack(struct timer_list *timer, 136static inline void setup_timer_on_stack_key(struct timer_list *timer,
137 const char *name,
138 struct lock_class_key *key,
64 void (*function)(unsigned long), 139 void (*function)(unsigned long),
65 unsigned long data) 140 unsigned long data)
66{ 141{
67 timer->function = function; 142 timer->function = function;
68 timer->data = data; 143 timer->data = data;
69 init_timer_on_stack(timer); 144 init_timer_on_stack_key(timer, name, key);
70} 145}
71 146
72/** 147/**
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
new file mode 100644
index 00000000000..5ca67df87f2
--- /dev/null
+++ b/include/trace/lockdep.h
@@ -0,0 +1,9 @@
1#ifndef _TRACE_LOCKDEP_H
2#define _TRACE_LOCKDEP_H
3
4#include <linux/lockdep.h>
5#include <linux/tracepoint.h>
6
7#include <trace/lockdep_event_types.h>
8
9#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
new file mode 100644
index 00000000000..f713d74a82b
--- /dev/null
+++ b/include/trace/lockdep_event_types.h
@@ -0,0 +1,44 @@
1
2#ifndef TRACE_EVENT_FORMAT
3# error Do not include this file directly.
4# error Unless you know what you are doing.
5#endif
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM lock
9
10#ifdef CONFIG_LOCKDEP
11
12TRACE_FORMAT(lock_acquire,
13 TPPROTO(struct lockdep_map *lock, unsigned int subclass,
14 int trylock, int read, int check,
15 struct lockdep_map *next_lock, unsigned long ip),
16 TPARGS(lock, subclass, trylock, read, check, next_lock, ip),
17 TPFMT("%s%s%s", trylock ? "try " : "",
18 read ? "read " : "", lock->name)
19 );
20
21TRACE_FORMAT(lock_release,
22 TPPROTO(struct lockdep_map *lock, int nested, unsigned long ip),
23 TPARGS(lock, nested, ip),
24 TPFMT("%s", lock->name)
25 );
26
27#ifdef CONFIG_LOCK_STAT
28
29TRACE_FORMAT(lock_contended,
30 TPPROTO(struct lockdep_map *lock, unsigned long ip),
31 TPARGS(lock, ip),
32 TPFMT("%s", lock->name)
33 );
34
35TRACE_FORMAT(lock_acquired,
36 TPPROTO(struct lockdep_map *lock, unsigned long ip),
37 TPARGS(lock, ip),
38 TPFMT("%s", lock->name)
39 );
40
41#endif
42#endif
43
44#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
index 33c8ed5ccb6..df56f5694be 100644
--- a/include/trace/trace_event_types.h
+++ b/include/trace/trace_event_types.h
@@ -2,3 +2,4 @@
2 2
3#include <trace/sched_event_types.h> 3#include <trace/sched_event_types.h>
4#include <trace/irq_event_types.h> 4#include <trace/irq_event_types.h>
5#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index ea2ef205176..fd13750ca4b 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -2,3 +2,4 @@
2 2
3#include <trace/sched.h> 3#include <trace/sched.h>
4#include <trace/irq.h> 4#include <trace/irq.h>
5#include <trace/lockdep.h>
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06b0c3568f0..cb70c1db85d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -41,6 +41,8 @@
41#include <linux/utsname.h> 41#include <linux/utsname.h>
42#include <linux/hash.h> 42#include <linux/hash.h>
43#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/stringify.h>
45#include <trace/lockdep.h>
44 46
45#include <asm/sections.h> 47#include <asm/sections.h>
46 48
@@ -310,12 +312,14 @@ EXPORT_SYMBOL(lockdep_on);
310#if VERBOSE 312#if VERBOSE
311# define HARDIRQ_VERBOSE 1 313# define HARDIRQ_VERBOSE 1
312# define SOFTIRQ_VERBOSE 1 314# define SOFTIRQ_VERBOSE 1
315# define RECLAIM_VERBOSE 1
313#else 316#else
314# define HARDIRQ_VERBOSE 0 317# define HARDIRQ_VERBOSE 0
315# define SOFTIRQ_VERBOSE 0 318# define SOFTIRQ_VERBOSE 0
319# define RECLAIM_VERBOSE 0
316#endif 320#endif
317 321
318#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE 322#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
319/* 323/*
320 * Quick filtering for interesting events: 324 * Quick filtering for interesting events:
321 */ 325 */
@@ -443,17 +447,18 @@ atomic_t nr_find_usage_backwards_recursions;
443 * Locking printouts: 447 * Locking printouts:
444 */ 448 */
445 449
450#define __USAGE(__STATE) \
451 [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \
452 [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \
453 [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\
454 [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R",
455
446static const char *usage_str[] = 456static const char *usage_str[] =
447{ 457{
448 [LOCK_USED] = "initial-use ", 458#define LOCKDEP_STATE(__STATE) __USAGE(__STATE)
449 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W", 459#include "lockdep_states.h"
450 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W", 460#undef LOCKDEP_STATE
451 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W", 461 [LOCK_USED] = "INITIAL USE",
452 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
453 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
454 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
455 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
456 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
457}; 462};
458 463
459const char * __get_key_name(struct lockdep_subclass_key *key, char *str) 464const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
@@ -461,46 +466,45 @@ const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
461 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); 466 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
462} 467}
463 468
464void 469static inline unsigned long lock_flag(enum lock_usage_bit bit)
465get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
466{ 470{
467 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; 471 return 1UL << bit;
468 472}
469 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
470 *c1 = '+';
471 else
472 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
473 *c1 = '-';
474 473
475 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) 474static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
476 *c2 = '+'; 475{
477 else 476 char c = '.';
478 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
479 *c2 = '-';
480 477
481 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 478 if (class->usage_mask & lock_flag(bit + 2))
482 *c3 = '-'; 479 c = '+';
483 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { 480 if (class->usage_mask & lock_flag(bit)) {
484 *c3 = '+'; 481 c = '-';
485 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 482 if (class->usage_mask & lock_flag(bit + 2))
486 *c3 = '?'; 483 c = '?';
487 } 484 }
488 485
489 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 486 return c;
490 *c4 = '-'; 487}
491 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { 488
492 *c4 = '+'; 489void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
493 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 490{
494 *c4 = '?'; 491 int i = 0;
495 } 492
493#define LOCKDEP_STATE(__STATE) \
494 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \
495 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ);
496#include "lockdep_states.h"
497#undef LOCKDEP_STATE
498
499 usage[i] = '\0';
496} 500}
497 501
498static void print_lock_name(struct lock_class *class) 502static void print_lock_name(struct lock_class *class)
499{ 503{
500 char str[KSYM_NAME_LEN], c1, c2, c3, c4; 504 char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
501 const char *name; 505 const char *name;
502 506
503 get_usage_chars(class, &c1, &c2, &c3, &c4); 507 get_usage_chars(class, usage);
504 508
505 name = class->name; 509 name = class->name;
506 if (!name) { 510 if (!name) {
@@ -513,7 +517,7 @@ static void print_lock_name(struct lock_class *class)
513 if (class->subclass) 517 if (class->subclass)
514 printk("/%d", class->subclass); 518 printk("/%d", class->subclass);
515 } 519 }
516 printk("){%c%c%c%c}", c1, c2, c3, c4); 520 printk("){%s}", usage);
517} 521}
518 522
519static void print_lockdep_cache(struct lockdep_map *lock) 523static void print_lockdep_cache(struct lockdep_map *lock)
@@ -1263,9 +1267,49 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
1263 bit_backwards, bit_forwards, irqclass); 1267 bit_backwards, bit_forwards, irqclass);
1264} 1268}
1265 1269
1266static int 1270static const char *state_names[] = {
1267check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, 1271#define LOCKDEP_STATE(__STATE) \
1268 struct held_lock *next) 1272 __stringify(__STATE),
1273#include "lockdep_states.h"
1274#undef LOCKDEP_STATE
1275};
1276
1277static const char *state_rnames[] = {
1278#define LOCKDEP_STATE(__STATE) \
1279 __stringify(__STATE)"-READ",
1280#include "lockdep_states.h"
1281#undef LOCKDEP_STATE
1282};
1283
1284static inline const char *state_name(enum lock_usage_bit bit)
1285{
1286 return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2];
1287}
1288
1289static int exclusive_bit(int new_bit)
1290{
1291 /*
1292 * USED_IN
1293 * USED_IN_READ
1294 * ENABLED
1295 * ENABLED_READ
1296 *
1297 * bit 0 - write/read
1298 * bit 1 - used_in/enabled
1299 * bit 2+ state
1300 */
1301
1302 int state = new_bit & ~3;
1303 int dir = new_bit & 2;
1304
1305 /*
1306 * keep state, bit flip the direction and strip read.
1307 */
1308 return state | (dir ^ 2);
1309}
1310
1311static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
1312 struct held_lock *next, enum lock_usage_bit bit)
1269{ 1313{
1270 /* 1314 /*
1271 * Prove that the new dependency does not connect a hardirq-safe 1315 * Prove that the new dependency does not connect a hardirq-safe
@@ -1273,38 +1317,34 @@ check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1273 * the backwards-subgraph starting at <prev>, and the 1317 * the backwards-subgraph starting at <prev>, and the
1274 * forwards-subgraph starting at <next>: 1318 * forwards-subgraph starting at <next>:
1275 */ 1319 */
1276 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, 1320 if (!check_usage(curr, prev, next, bit,
1277 LOCK_ENABLED_HARDIRQS, "hard")) 1321 exclusive_bit(bit), state_name(bit)))
1278 return 0; 1322 return 0;
1279 1323
1324 bit++; /* _READ */
1325
1280 /* 1326 /*
1281 * Prove that the new dependency does not connect a hardirq-safe-read 1327 * Prove that the new dependency does not connect a hardirq-safe-read
1282 * lock with a hardirq-unsafe lock - to achieve this we search 1328 * lock with a hardirq-unsafe lock - to achieve this we search
1283 * the backwards-subgraph starting at <prev>, and the 1329 * the backwards-subgraph starting at <prev>, and the
1284 * forwards-subgraph starting at <next>: 1330 * forwards-subgraph starting at <next>:
1285 */ 1331 */
1286 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, 1332 if (!check_usage(curr, prev, next, bit,
1287 LOCK_ENABLED_HARDIRQS, "hard-read")) 1333 exclusive_bit(bit), state_name(bit)))
1288 return 0; 1334 return 0;
1289 1335
1290 /* 1336 return 1;
1291 * Prove that the new dependency does not connect a softirq-safe 1337}
1292 * lock with a softirq-unsafe lock - to achieve this we search 1338
1293 * the backwards-subgraph starting at <prev>, and the 1339static int
1294 * forwards-subgraph starting at <next>: 1340check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1295 */ 1341 struct held_lock *next)
1296 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, 1342{
1297 LOCK_ENABLED_SOFTIRQS, "soft")) 1343#define LOCKDEP_STATE(__STATE) \
1298 return 0; 1344 if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
1299 /*
1300 * Prove that the new dependency does not connect a softirq-safe-read
1301 * lock with a softirq-unsafe lock - to achieve this we search
1302 * the backwards-subgraph starting at <prev>, and the
1303 * forwards-subgraph starting at <next>:
1304 */
1305 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
1306 LOCK_ENABLED_SOFTIRQS, "soft"))
1307 return 0; 1345 return 0;
1346#include "lockdep_states.h"
1347#undef LOCKDEP_STATE
1308 1348
1309 return 1; 1349 return 1;
1310} 1350}
@@ -1861,9 +1901,9 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1861 curr->comm, task_pid_nr(curr)); 1901 curr->comm, task_pid_nr(curr));
1862 print_lock(this); 1902 print_lock(this);
1863 if (forwards) 1903 if (forwards)
1864 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); 1904 printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
1865 else 1905 else
1866 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); 1906 printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
1867 print_lock_name(other); 1907 print_lock_name(other);
1868 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); 1908 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1869 1909
@@ -1933,7 +1973,7 @@ void print_irqtrace_events(struct task_struct *curr)
1933 print_ip_sym(curr->softirq_disable_ip); 1973 print_ip_sym(curr->softirq_disable_ip);
1934} 1974}
1935 1975
1936static int hardirq_verbose(struct lock_class *class) 1976static int HARDIRQ_verbose(struct lock_class *class)
1937{ 1977{
1938#if HARDIRQ_VERBOSE 1978#if HARDIRQ_VERBOSE
1939 return class_filter(class); 1979 return class_filter(class);
@@ -1941,7 +1981,7 @@ static int hardirq_verbose(struct lock_class *class)
1941 return 0; 1981 return 0;
1942} 1982}
1943 1983
1944static int softirq_verbose(struct lock_class *class) 1984static int SOFTIRQ_verbose(struct lock_class *class)
1945{ 1985{
1946#if SOFTIRQ_VERBOSE 1986#if SOFTIRQ_VERBOSE
1947 return class_filter(class); 1987 return class_filter(class);
@@ -1949,185 +1989,95 @@ static int softirq_verbose(struct lock_class *class)
1949 return 0; 1989 return 0;
1950} 1990}
1951 1991
1992static int RECLAIM_FS_verbose(struct lock_class *class)
1993{
1994#if RECLAIM_VERBOSE
1995 return class_filter(class);
1996#endif
1997 return 0;
1998}
1999
1952#define STRICT_READ_CHECKS 1 2000#define STRICT_READ_CHECKS 1
1953 2001
1954static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, 2002static int (*state_verbose_f[])(struct lock_class *class) = {
2003#define LOCKDEP_STATE(__STATE) \
2004 __STATE##_verbose,
2005#include "lockdep_states.h"
2006#undef LOCKDEP_STATE
2007};
2008
2009static inline int state_verbose(enum lock_usage_bit bit,
2010 struct lock_class *class)
2011{
2012 return state_verbose_f[bit >> 2](class);
2013}
2014
2015typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
2016 enum lock_usage_bit bit, const char *name);
2017
2018static int
2019mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1955 enum lock_usage_bit new_bit) 2020 enum lock_usage_bit new_bit)
1956{ 2021{
1957 int ret = 1; 2022 int excl_bit = exclusive_bit(new_bit);
2023 int read = new_bit & 1;
2024 int dir = new_bit & 2;
1958 2025
1959 switch(new_bit) { 2026 /*
1960 case LOCK_USED_IN_HARDIRQ: 2027 * mark USED_IN has to look forwards -- to ensure no dependency
1961 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) 2028 * has ENABLED state, which would allow recursion deadlocks.
1962 return 0; 2029 *
1963 if (!valid_state(curr, this, new_bit, 2030 * mark ENABLED has to look backwards -- to ensure no dependee
1964 LOCK_ENABLED_HARDIRQS_READ)) 2031 * has USED_IN state, which, again, would allow recursion deadlocks.
1965 return 0; 2032 */
1966 /* 2033 check_usage_f usage = dir ?
1967 * just marked it hardirq-safe, check that this lock 2034 check_usage_backwards : check_usage_forwards;
1968 * took no hardirq-unsafe lock in the past: 2035
1969 */ 2036 /*
1970 if (!check_usage_forwards(curr, this, 2037 * Validate that this particular lock does not have conflicting
1971 LOCK_ENABLED_HARDIRQS, "hard")) 2038 * usage states.
1972 return 0; 2039 */
1973#if STRICT_READ_CHECKS 2040 if (!valid_state(curr, this, new_bit, excl_bit))
1974 /* 2041 return 0;
1975 * just marked it hardirq-safe, check that this lock 2042
1976 * took no hardirq-unsafe-read lock in the past: 2043 /*
1977 */ 2044 * Validate that the lock dependencies don't have conflicting usage
1978 if (!check_usage_forwards(curr, this, 2045 * states.
1979 LOCK_ENABLED_HARDIRQS_READ, "hard-read")) 2046 */
1980 return 0; 2047 if ((!read || !dir || STRICT_READ_CHECKS) &&
1981#endif 2048 !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
1982 if (hardirq_verbose(hlock_class(this))) 2049 return 0;
1983 ret = 2; 2050
1984 break; 2051 /*
1985 case LOCK_USED_IN_SOFTIRQ: 2052 * Check for read in write conflicts
1986 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) 2053 */
1987 return 0; 2054 if (!read) {
1988 if (!valid_state(curr, this, new_bit, 2055 if (!valid_state(curr, this, new_bit, excl_bit + 1))
1989 LOCK_ENABLED_SOFTIRQS_READ))
1990 return 0;
1991 /*
1992 * just marked it softirq-safe, check that this lock
1993 * took no softirq-unsafe lock in the past:
1994 */
1995 if (!check_usage_forwards(curr, this,
1996 LOCK_ENABLED_SOFTIRQS, "soft"))
1997 return 0;
1998#if STRICT_READ_CHECKS
1999 /*
2000 * just marked it softirq-safe, check that this lock
2001 * took no softirq-unsafe-read lock in the past:
2002 */
2003 if (!check_usage_forwards(curr, this,
2004 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
2005 return 0;
2006#endif
2007 if (softirq_verbose(hlock_class(this)))
2008 ret = 2;
2009 break;
2010 case LOCK_USED_IN_HARDIRQ_READ:
2011 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
2012 return 0;
2013 /*
2014 * just marked it hardirq-read-safe, check that this lock
2015 * took no hardirq-unsafe lock in the past:
2016 */
2017 if (!check_usage_forwards(curr, this,
2018 LOCK_ENABLED_HARDIRQS, "hard"))
2019 return 0;
2020 if (hardirq_verbose(hlock_class(this)))
2021 ret = 2;
2022 break;
2023 case LOCK_USED_IN_SOFTIRQ_READ:
2024 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
2025 return 0;
2026 /*
2027 * just marked it softirq-read-safe, check that this lock
2028 * took no softirq-unsafe lock in the past:
2029 */
2030 if (!check_usage_forwards(curr, this,
2031 LOCK_ENABLED_SOFTIRQS, "soft"))
2032 return 0;
2033 if (softirq_verbose(hlock_class(this)))
2034 ret = 2;
2035 break;
2036 case LOCK_ENABLED_HARDIRQS:
2037 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
2038 return 0;
2039 if (!valid_state(curr, this, new_bit,
2040 LOCK_USED_IN_HARDIRQ_READ))
2041 return 0;
2042 /*
2043 * just marked it hardirq-unsafe, check that no hardirq-safe
2044 * lock in the system ever took it in the past:
2045 */
2046 if (!check_usage_backwards(curr, this,
2047 LOCK_USED_IN_HARDIRQ, "hard"))
2048 return 0;
2049#if STRICT_READ_CHECKS
2050 /*
2051 * just marked it hardirq-unsafe, check that no
2052 * hardirq-safe-read lock in the system ever took
2053 * it in the past:
2054 */
2055 if (!check_usage_backwards(curr, this,
2056 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
2057 return 0;
2058#endif
2059 if (hardirq_verbose(hlock_class(this)))
2060 ret = 2;
2061 break;
2062 case LOCK_ENABLED_SOFTIRQS:
2063 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
2064 return 0;
2065 if (!valid_state(curr, this, new_bit,
2066 LOCK_USED_IN_SOFTIRQ_READ))
2067 return 0;
2068 /*
2069 * just marked it softirq-unsafe, check that no softirq-safe
2070 * lock in the system ever took it in the past:
2071 */
2072 if (!check_usage_backwards(curr, this,
2073 LOCK_USED_IN_SOFTIRQ, "soft"))
2074 return 0;
2075#if STRICT_READ_CHECKS
2076 /*
2077 * just marked it softirq-unsafe, check that no
2078 * softirq-safe-read lock in the system ever took
2079 * it in the past:
2080 */
2081 if (!check_usage_backwards(curr, this,
2082 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
2083 return 0;
2084#endif
2085 if (softirq_verbose(hlock_class(this)))
2086 ret = 2;
2087 break;
2088 case LOCK_ENABLED_HARDIRQS_READ:
2089 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
2090 return 0;
2091#if STRICT_READ_CHECKS
2092 /*
2093 * just marked it hardirq-read-unsafe, check that no
2094 * hardirq-safe lock in the system ever took it in the past:
2095 */
2096 if (!check_usage_backwards(curr, this,
2097 LOCK_USED_IN_HARDIRQ, "hard"))
2098 return 0;
2099#endif
2100 if (hardirq_verbose(hlock_class(this)))
2101 ret = 2;
2102 break;
2103 case LOCK_ENABLED_SOFTIRQS_READ:
2104 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
2105 return 0; 2056 return 0;
2106#if STRICT_READ_CHECKS 2057
2107 /* 2058 if (STRICT_READ_CHECKS &&
2108 * just marked it softirq-read-unsafe, check that no 2059 !usage(curr, this, excl_bit + 1,
2109 * softirq-safe lock in the system ever took it in the past: 2060 state_name(new_bit + 1)))
2110 */
2111 if (!check_usage_backwards(curr, this,
2112 LOCK_USED_IN_SOFTIRQ, "soft"))
2113 return 0; 2061 return 0;
2114#endif
2115 if (softirq_verbose(hlock_class(this)))
2116 ret = 2;
2117 break;
2118 default:
2119 WARN_ON(1);
2120 break;
2121 } 2062 }
2122 2063
2123 return ret; 2064 if (state_verbose(new_bit, hlock_class(this)))
2065 return 2;
2066
2067 return 1;
2124} 2068}
2125 2069
2070enum mark_type {
2071#define LOCKDEP_STATE(__STATE) __STATE,
2072#include "lockdep_states.h"
2073#undef LOCKDEP_STATE
2074};
2075
2126/* 2076/*
2127 * Mark all held locks with a usage bit: 2077 * Mark all held locks with a usage bit:
2128 */ 2078 */
2129static int 2079static int
2130mark_held_locks(struct task_struct *curr, int hardirq) 2080mark_held_locks(struct task_struct *curr, enum mark_type mark)
2131{ 2081{
2132 enum lock_usage_bit usage_bit; 2082 enum lock_usage_bit usage_bit;
2133 struct held_lock *hlock; 2083 struct held_lock *hlock;
@@ -2136,17 +2086,12 @@ mark_held_locks(struct task_struct *curr, int hardirq)
2136 for (i = 0; i < curr->lockdep_depth; i++) { 2086 for (i = 0; i < curr->lockdep_depth; i++) {
2137 hlock = curr->held_locks + i; 2087 hlock = curr->held_locks + i;
2138 2088
2139 if (hardirq) { 2089 usage_bit = 2 + (mark << 2); /* ENABLED */
2140 if (hlock->read) 2090 if (hlock->read)
2141 usage_bit = LOCK_ENABLED_HARDIRQS_READ; 2091 usage_bit += 1; /* READ */
2142 else 2092
2143 usage_bit = LOCK_ENABLED_HARDIRQS; 2093 BUG_ON(usage_bit >= LOCK_USAGE_STATES);
2144 } else { 2094
2145 if (hlock->read)
2146 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
2147 else
2148 usage_bit = LOCK_ENABLED_SOFTIRQS;
2149 }
2150 if (!mark_lock(curr, hlock, usage_bit)) 2095 if (!mark_lock(curr, hlock, usage_bit))
2151 return 0; 2096 return 0;
2152 } 2097 }
@@ -2200,7 +2145,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2200 * We are going to turn hardirqs on, so set the 2145 * We are going to turn hardirqs on, so set the
2201 * usage bit for all held locks: 2146 * usage bit for all held locks:
2202 */ 2147 */
2203 if (!mark_held_locks(curr, 1)) 2148 if (!mark_held_locks(curr, HARDIRQ))
2204 return; 2149 return;
2205 /* 2150 /*
2206 * If we have softirqs enabled, then set the usage 2151 * If we have softirqs enabled, then set the usage
@@ -2208,7 +2153,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2208 * this bit from being set before) 2153 * this bit from being set before)
2209 */ 2154 */
2210 if (curr->softirqs_enabled) 2155 if (curr->softirqs_enabled)
2211 if (!mark_held_locks(curr, 0)) 2156 if (!mark_held_locks(curr, SOFTIRQ))
2212 return; 2157 return;
2213 2158
2214 curr->hardirq_enable_ip = ip; 2159 curr->hardirq_enable_ip = ip;
@@ -2288,7 +2233,7 @@ void trace_softirqs_on(unsigned long ip)
2288 * enabled too: 2233 * enabled too:
2289 */ 2234 */
2290 if (curr->hardirqs_enabled) 2235 if (curr->hardirqs_enabled)
2291 mark_held_locks(curr, 0); 2236 mark_held_locks(curr, SOFTIRQ);
2292} 2237}
2293 2238
2294/* 2239/*
@@ -2317,6 +2262,31 @@ void trace_softirqs_off(unsigned long ip)
2317 debug_atomic_inc(&redundant_softirqs_off); 2262 debug_atomic_inc(&redundant_softirqs_off);
2318} 2263}
2319 2264
2265void lockdep_trace_alloc(gfp_t gfp_mask)
2266{
2267 struct task_struct *curr = current;
2268
2269 if (unlikely(!debug_locks))
2270 return;
2271
2272 /* no reclaim without waiting on it */
2273 if (!(gfp_mask & __GFP_WAIT))
2274 return;
2275
2276 /* this guy won't enter reclaim */
2277 if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
2278 return;
2279
2280 /* We're only interested __GFP_FS allocations for now */
2281 if (!(gfp_mask & __GFP_FS))
2282 return;
2283
2284 if (DEBUG_LOCKS_WARN_ON(irqs_disabled()))
2285 return;
2286
2287 mark_held_locks(curr, RECLAIM_FS);
2288}
2289
2320static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) 2290static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2321{ 2291{
2322 /* 2292 /*
@@ -2345,19 +2315,35 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2345 if (!hlock->hardirqs_off) { 2315 if (!hlock->hardirqs_off) {
2346 if (hlock->read) { 2316 if (hlock->read) {
2347 if (!mark_lock(curr, hlock, 2317 if (!mark_lock(curr, hlock,
2348 LOCK_ENABLED_HARDIRQS_READ)) 2318 LOCK_ENABLED_HARDIRQ_READ))
2349 return 0; 2319 return 0;
2350 if (curr->softirqs_enabled) 2320 if (curr->softirqs_enabled)
2351 if (!mark_lock(curr, hlock, 2321 if (!mark_lock(curr, hlock,
2352 LOCK_ENABLED_SOFTIRQS_READ)) 2322 LOCK_ENABLED_SOFTIRQ_READ))
2353 return 0; 2323 return 0;
2354 } else { 2324 } else {
2355 if (!mark_lock(curr, hlock, 2325 if (!mark_lock(curr, hlock,
2356 LOCK_ENABLED_HARDIRQS)) 2326 LOCK_ENABLED_HARDIRQ))
2357 return 0; 2327 return 0;
2358 if (curr->softirqs_enabled) 2328 if (curr->softirqs_enabled)
2359 if (!mark_lock(curr, hlock, 2329 if (!mark_lock(curr, hlock,
2360 LOCK_ENABLED_SOFTIRQS)) 2330 LOCK_ENABLED_SOFTIRQ))
2331 return 0;
2332 }
2333 }
2334
2335 /*
2336 * We reuse the irq context infrastructure more broadly as a general
2337 * context checking code. This tests GFP_FS recursion (a lock taken
2338 * during reclaim for a GFP_FS allocation is held over a GFP_FS
2339 * allocation).
2340 */
2341 if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
2342 if (hlock->read) {
2343 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
2344 return 0;
2345 } else {
2346 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
2361 return 0; 2347 return 0;
2362 } 2348 }
2363 } 2349 }
@@ -2412,6 +2398,10 @@ static inline int separate_irq_context(struct task_struct *curr,
2412 return 0; 2398 return 0;
2413} 2399}
2414 2400
2401void lockdep_trace_alloc(gfp_t gfp_mask)
2402{
2403}
2404
2415#endif 2405#endif
2416 2406
2417/* 2407/*
@@ -2445,14 +2435,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2445 return 0; 2435 return 0;
2446 2436
2447 switch (new_bit) { 2437 switch (new_bit) {
2448 case LOCK_USED_IN_HARDIRQ: 2438#define LOCKDEP_STATE(__STATE) \
2449 case LOCK_USED_IN_SOFTIRQ: 2439 case LOCK_USED_IN_##__STATE: \
2450 case LOCK_USED_IN_HARDIRQ_READ: 2440 case LOCK_USED_IN_##__STATE##_READ: \
2451 case LOCK_USED_IN_SOFTIRQ_READ: 2441 case LOCK_ENABLED_##__STATE: \
2452 case LOCK_ENABLED_HARDIRQS: 2442 case LOCK_ENABLED_##__STATE##_READ:
2453 case LOCK_ENABLED_SOFTIRQS: 2443#include "lockdep_states.h"
2454 case LOCK_ENABLED_HARDIRQS_READ: 2444#undef LOCKDEP_STATE
2455 case LOCK_ENABLED_SOFTIRQS_READ:
2456 ret = mark_lock_irq(curr, this, new_bit); 2445 ret = mark_lock_irq(curr, this, new_bit);
2457 if (!ret) 2446 if (!ret)
2458 return 0; 2447 return 0;
@@ -2925,6 +2914,8 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
2925} 2914}
2926EXPORT_SYMBOL_GPL(lock_set_class); 2915EXPORT_SYMBOL_GPL(lock_set_class);
2927 2916
2917DEFINE_TRACE(lock_acquire);
2918
2928/* 2919/*
2929 * We are not always called with irqs disabled - do that here, 2920 * We are not always called with irqs disabled - do that here,
2930 * and also avoid lockdep recursion: 2921 * and also avoid lockdep recursion:
@@ -2935,6 +2926,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2935{ 2926{
2936 unsigned long flags; 2927 unsigned long flags;
2937 2928
2929 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
2930
2938 if (unlikely(current->lockdep_recursion)) 2931 if (unlikely(current->lockdep_recursion))
2939 return; 2932 return;
2940 2933
@@ -2949,11 +2942,15 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2949} 2942}
2950EXPORT_SYMBOL_GPL(lock_acquire); 2943EXPORT_SYMBOL_GPL(lock_acquire);
2951 2944
2945DEFINE_TRACE(lock_release);
2946
2952void lock_release(struct lockdep_map *lock, int nested, 2947void lock_release(struct lockdep_map *lock, int nested,
2953 unsigned long ip) 2948 unsigned long ip)
2954{ 2949{
2955 unsigned long flags; 2950 unsigned long flags;
2956 2951
2952 trace_lock_release(lock, nested, ip);
2953
2957 if (unlikely(current->lockdep_recursion)) 2954 if (unlikely(current->lockdep_recursion))
2958 return; 2955 return;
2959 2956
@@ -2966,6 +2963,16 @@ void lock_release(struct lockdep_map *lock, int nested,
2966} 2963}
2967EXPORT_SYMBOL_GPL(lock_release); 2964EXPORT_SYMBOL_GPL(lock_release);
2968 2965
2966void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
2967{
2968 current->lockdep_reclaim_gfp = gfp_mask;
2969}
2970
2971void lockdep_clear_current_reclaim_state(void)
2972{
2973 current->lockdep_reclaim_gfp = 0;
2974}
2975
2969#ifdef CONFIG_LOCK_STAT 2976#ifdef CONFIG_LOCK_STAT
2970static int 2977static int
2971print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, 2978print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
@@ -3092,10 +3099,14 @@ found_it:
3092 lock->ip = ip; 3099 lock->ip = ip;
3093} 3100}
3094 3101
3102DEFINE_TRACE(lock_contended);
3103
3095void lock_contended(struct lockdep_map *lock, unsigned long ip) 3104void lock_contended(struct lockdep_map *lock, unsigned long ip)
3096{ 3105{
3097 unsigned long flags; 3106 unsigned long flags;
3098 3107
3108 trace_lock_contended(lock, ip);
3109
3099 if (unlikely(!lock_stat)) 3110 if (unlikely(!lock_stat))
3100 return; 3111 return;
3101 3112
@@ -3111,10 +3122,14 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3111} 3122}
3112EXPORT_SYMBOL_GPL(lock_contended); 3123EXPORT_SYMBOL_GPL(lock_contended);
3113 3124
3125DEFINE_TRACE(lock_acquired);
3126
3114void lock_acquired(struct lockdep_map *lock, unsigned long ip) 3127void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3115{ 3128{
3116 unsigned long flags; 3129 unsigned long flags;
3117 3130
3131 trace_lock_acquired(lock, ip);
3132
3118 if (unlikely(!lock_stat)) 3133 if (unlikely(!lock_stat))
3119 return; 3134 return;
3120 3135
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index 56b196932c0..a2cc7e9a6e8 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -7,6 +7,45 @@
7 */ 7 */
8 8
9/* 9/*
10 * Lock-class usage-state bits:
11 */
12enum lock_usage_bit {
13#define LOCKDEP_STATE(__STATE) \
14 LOCK_USED_IN_##__STATE, \
15 LOCK_USED_IN_##__STATE##_READ, \
16 LOCK_ENABLED_##__STATE, \
17 LOCK_ENABLED_##__STATE##_READ,
18#include "lockdep_states.h"
19#undef LOCKDEP_STATE
20 LOCK_USED,
21 LOCK_USAGE_STATES
22};
23
24/*
25 * Usage-state bitmasks:
26 */
27#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE),
28
29enum {
30#define LOCKDEP_STATE(__STATE) \
31 __LOCKF(USED_IN_##__STATE) \
32 __LOCKF(USED_IN_##__STATE##_READ) \
33 __LOCKF(ENABLED_##__STATE) \
34 __LOCKF(ENABLED_##__STATE##_READ)
35#include "lockdep_states.h"
36#undef LOCKDEP_STATE
37 __LOCKF(USED)
38};
39
40#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
41#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
42
43#define LOCKF_ENABLED_IRQ_READ \
44 (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
45#define LOCKF_USED_IN_IRQ_READ \
46 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
47
48/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies 49 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track. 50 * we track.
12 * 51 *
@@ -31,8 +70,10 @@
31extern struct list_head all_lock_classes; 70extern struct list_head all_lock_classes;
32extern struct lock_chain lock_chains[]; 71extern struct lock_chain lock_chains[];
33 72
34extern void 73#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); 74
75extern void get_usage_chars(struct lock_class *class,
76 char usage[LOCK_USAGE_CHARS]);
36 77
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); 78extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38 79
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 13716b81389..d7135aa2d2c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -84,7 +84,7 @@ static int l_show(struct seq_file *m, void *v)
84{ 84{
85 struct lock_class *class = v; 85 struct lock_class *class = v;
86 struct lock_list *entry; 86 struct lock_list *entry;
87 char c1, c2, c3, c4; 87 char usage[LOCK_USAGE_CHARS];
88 88
89 if (v == SEQ_START_TOKEN) { 89 if (v == SEQ_START_TOKEN) {
90 seq_printf(m, "all lock classes:\n"); 90 seq_printf(m, "all lock classes:\n");
@@ -100,8 +100,8 @@ static int l_show(struct seq_file *m, void *v)
100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); 100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
101#endif 101#endif
102 102
103 get_usage_chars(class, &c1, &c2, &c3, &c4); 103 get_usage_chars(class, usage);
104 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); 104 seq_printf(m, " %s", usage);
105 105
106 seq_printf(m, ": "); 106 seq_printf(m, ": ");
107 print_name(m, class); 107 print_name(m, class);
@@ -300,27 +300,27 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
300 nr_uncategorized++; 300 nr_uncategorized++;
301 if (class->usage_mask & LOCKF_USED_IN_IRQ) 301 if (class->usage_mask & LOCKF_USED_IN_IRQ)
302 nr_irq_safe++; 302 nr_irq_safe++;
303 if (class->usage_mask & LOCKF_ENABLED_IRQS) 303 if (class->usage_mask & LOCKF_ENABLED_IRQ)
304 nr_irq_unsafe++; 304 nr_irq_unsafe++;
305 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) 305 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
306 nr_softirq_safe++; 306 nr_softirq_safe++;
307 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) 307 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
308 nr_softirq_unsafe++; 308 nr_softirq_unsafe++;
309 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) 309 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
310 nr_hardirq_safe++; 310 nr_hardirq_safe++;
311 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) 311 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
312 nr_hardirq_unsafe++; 312 nr_hardirq_unsafe++;
313 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) 313 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
314 nr_irq_read_safe++; 314 nr_irq_read_safe++;
315 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ) 315 if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
316 nr_irq_read_unsafe++; 316 nr_irq_read_unsafe++;
317 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) 317 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
318 nr_softirq_read_safe++; 318 nr_softirq_read_safe++;
319 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 319 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
320 nr_softirq_read_unsafe++; 320 nr_softirq_read_unsafe++;
321 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) 321 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
322 nr_hardirq_read_safe++; 322 nr_hardirq_read_safe++;
323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
324 nr_hardirq_read_unsafe++; 324 nr_hardirq_read_unsafe++;
325 325
326#ifdef CONFIG_PROVE_LOCKING 326#ifdef CONFIG_PROVE_LOCKING
@@ -601,6 +601,10 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
601static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
602{ 602{
603 seq_printf(m, "lock_stat version 0.3\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
604
605 if (unlikely(!debug_locks))
606 seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
607
604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 608 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 609 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
606 "%14s %14s\n", 610 "%14s %14s\n",
diff --git a/kernel/lockdep_states.h b/kernel/lockdep_states.h
new file mode 100644
index 00000000000..995b0cc2b84
--- /dev/null
+++ b/kernel/lockdep_states.h
@@ -0,0 +1,9 @@
1/*
2 * Lockdep states,
3 *
4 * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
5 * you add one, or come up with a nice dynamic solution.
6 */
7LOCKDEP_STATE(HARDIRQ)
8LOCKDEP_STATE(SOFTIRQ)
9LOCKDEP_STATE(RECLAIM_FS)
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160eb53..50d022e5a56 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -26,11 +26,6 @@
26/* 26/*
27 * Must be called with lock->wait_lock held. 27 * Must be called with lock->wait_lock held.
28 */ 28 */
29void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
30{
31 lock->owner = new_owner;
32}
33
34void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) 29void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
35{ 30{
36 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 31 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
@@ -59,7 +54,6 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
59 54
60 /* Mark the current thread as blocked on the lock: */ 55 /* Mark the current thread as blocked on the lock: */
61 ti->task->blocked_on = waiter; 56 ti->task->blocked_on = waiter;
62 waiter->lock = lock;
63} 57}
64 58
65void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 59void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
@@ -82,7 +76,7 @@ void debug_mutex_unlock(struct mutex *lock)
82 DEBUG_LOCKS_WARN_ON(lock->magic != lock); 76 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 77 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
84 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 78 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
85 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 79 mutex_clear_owner(lock);
86} 80}
87 81
88void debug_mutex_init(struct mutex *lock, const char *name, 82void debug_mutex_init(struct mutex *lock, const char *name,
@@ -95,7 +89,6 @@ void debug_mutex_init(struct mutex *lock, const char *name,
95 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 89 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
96 lockdep_init_map(&lock->dep_map, name, key, 0); 90 lockdep_init_map(&lock->dep_map, name, key, 0);
97#endif 91#endif
98 lock->owner = NULL;
99 lock->magic = lock; 92 lock->magic = lock;
100} 93}
101 94
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index babfbdfc534..6b2d735846a 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -13,14 +13,6 @@
13/* 13/*
14 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
15 */ 15 */
16extern void
17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
18
19static inline void debug_mutex_clear_owner(struct mutex *lock)
20{
21 lock->owner = NULL;
22}
23
24extern void debug_mutex_lock_common(struct mutex *lock, 16extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter); 17 struct mutex_waiter *waiter);
26extern void debug_mutex_wake_waiter(struct mutex *lock, 18extern void debug_mutex_wake_waiter(struct mutex *lock,
@@ -35,6 +27,16 @@ extern void debug_mutex_unlock(struct mutex *lock);
35extern void debug_mutex_init(struct mutex *lock, const char *name, 27extern void debug_mutex_init(struct mutex *lock, const char *name,
36 struct lock_class_key *key); 28 struct lock_class_key *key);
37 29
30static inline void mutex_set_owner(struct mutex *lock)
31{
32 lock->owner = current_thread_info();
33}
34
35static inline void mutex_clear_owner(struct mutex *lock)
36{
37 lock->owner = NULL;
38}
39
38#define spin_lock_mutex(lock, flags) \ 40#define spin_lock_mutex(lock, flags) \
39 do { \ 41 do { \
40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 42 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4f45d4b658e..5d79781394a 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -10,6 +10,11 @@
10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and 10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
11 * David Howells for suggestions and improvements. 11 * David Howells for suggestions and improvements.
12 * 12 *
13 * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
14 * from the -rt tree, where it was originally implemented for rtmutexes
15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
16 * and Sven Dietrich.
17 *
13 * Also see Documentation/mutex-design.txt. 18 * Also see Documentation/mutex-design.txt.
14 */ 19 */
15#include <linux/mutex.h> 20#include <linux/mutex.h>
@@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
46 atomic_set(&lock->count, 1); 51 atomic_set(&lock->count, 1);
47 spin_lock_init(&lock->wait_lock); 52 spin_lock_init(&lock->wait_lock);
48 INIT_LIST_HEAD(&lock->wait_list); 53 INIT_LIST_HEAD(&lock->wait_list);
54 mutex_clear_owner(lock);
49 55
50 debug_mutex_init(lock, name, key); 56 debug_mutex_init(lock, name, key);
51} 57}
@@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mutex *lock)
91 * 'unlocked' into 'locked' state. 97 * 'unlocked' into 'locked' state.
92 */ 98 */
93 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); 99 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
100 mutex_set_owner(lock);
94} 101}
95 102
96EXPORT_SYMBOL(mutex_lock); 103EXPORT_SYMBOL(mutex_lock);
@@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *lock)
115 * The unlocking fastpath is the 0->1 transition from 'locked' 122 * The unlocking fastpath is the 0->1 transition from 'locked'
116 * into 'unlocked' state: 123 * into 'unlocked' state:
117 */ 124 */
125#ifndef CONFIG_DEBUG_MUTEXES
126 /*
127 * When debugging is enabled we must not clear the owner before time,
128 * the slow path will always be taken, and that clears the owner field
129 * after verifying that it was indeed current.
130 */
131 mutex_clear_owner(lock);
132#endif
118 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); 133 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
119} 134}
120 135
@@ -129,21 +144,75 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
129{ 144{
130 struct task_struct *task = current; 145 struct task_struct *task = current;
131 struct mutex_waiter waiter; 146 struct mutex_waiter waiter;
132 unsigned int old_val;
133 unsigned long flags; 147 unsigned long flags;
134 148
149 preempt_disable();
150 mutex_acquire(&lock->dep_map, subclass, 0, ip);
151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
152 /*
153 * Optimistic spinning.
154 *
155 * We try to spin for acquisition when we find that there are no
156 * pending waiters and the lock owner is currently running on a
157 * (different) CPU.
158 *
159 * The rationale is that if the lock owner is running, it is likely to
160 * release the lock soon.
161 *
162 * Since this needs the lock owner, and this mutex implementation
163 * doesn't track the owner atomically in the lock field, we need to
164 * track it non-atomically.
165 *
166 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
167 * to serialize everything.
168 */
169
170 for (;;) {
171 struct thread_info *owner;
172
173 /*
174 * If there's an owner, wait for it to either
175 * release the lock or go to sleep.
176 */
177 owner = ACCESS_ONCE(lock->owner);
178 if (owner && !mutex_spin_on_owner(lock, owner))
179 break;
180
181 if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
182 lock_acquired(&lock->dep_map, ip);
183 mutex_set_owner(lock);
184 preempt_enable();
185 return 0;
186 }
187
188 /*
189 * When there's no owner, we might have preempted between the
190 * owner acquiring the lock and setting the owner field. If
191 * we're an RT task that will live-lock because we won't let
192 * the owner complete.
193 */
194 if (!owner && (need_resched() || rt_task(task)))
195 break;
196
197 /*
198 * The cpu_relax() call is a compiler barrier which forces
199 * everything in this loop to be re-loaded. We don't need
200 * memory barriers as we'll eventually observe the right
201 * values at the cost of a few extra spins.
202 */
203 cpu_relax();
204 }
205#endif
135 spin_lock_mutex(&lock->wait_lock, flags); 206 spin_lock_mutex(&lock->wait_lock, flags);
136 207
137 debug_mutex_lock_common(lock, &waiter); 208 debug_mutex_lock_common(lock, &waiter);
138 mutex_acquire(&lock->dep_map, subclass, 0, ip);
139 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); 209 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
140 210
141 /* add waiting tasks to the end of the waitqueue (FIFO): */ 211 /* add waiting tasks to the end of the waitqueue (FIFO): */
142 list_add_tail(&waiter.list, &lock->wait_list); 212 list_add_tail(&waiter.list, &lock->wait_list);
143 waiter.task = task; 213 waiter.task = task;
144 214
145 old_val = atomic_xchg(&lock->count, -1); 215 if (atomic_xchg(&lock->count, -1) == 1)
146 if (old_val == 1)
147 goto done; 216 goto done;
148 217
149 lock_contended(&lock->dep_map, ip); 218 lock_contended(&lock->dep_map, ip);
@@ -158,8 +227,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
158 * that when we release the lock, we properly wake up the 227 * that when we release the lock, we properly wake up the
159 * other waiters: 228 * other waiters:
160 */ 229 */
161 old_val = atomic_xchg(&lock->count, -1); 230 if (atomic_xchg(&lock->count, -1) == 1)
162 if (old_val == 1)
163 break; 231 break;
164 232
165 /* 233 /*
@@ -173,21 +241,22 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
173 spin_unlock_mutex(&lock->wait_lock, flags); 241 spin_unlock_mutex(&lock->wait_lock, flags);
174 242
175 debug_mutex_free_waiter(&waiter); 243 debug_mutex_free_waiter(&waiter);
244 preempt_enable();
176 return -EINTR; 245 return -EINTR;
177 } 246 }
178 __set_task_state(task, state); 247 __set_task_state(task, state);
179 248
180 /* didnt get the lock, go to sleep: */ 249 /* didnt get the lock, go to sleep: */
181 spin_unlock_mutex(&lock->wait_lock, flags); 250 spin_unlock_mutex(&lock->wait_lock, flags);
182 schedule(); 251 __schedule();
183 spin_lock_mutex(&lock->wait_lock, flags); 252 spin_lock_mutex(&lock->wait_lock, flags);
184 } 253 }
185 254
186done: 255done:
187 lock_acquired(&lock->dep_map, ip); 256 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 257 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 258 mutex_remove_waiter(lock, &waiter, current_thread_info());
190 debug_mutex_set_owner(lock, task_thread_info(task)); 259 mutex_set_owner(lock);
191 260
192 /* set it to 0 if there are no waiters left: */ 261 /* set it to 0 if there are no waiters left: */
193 if (likely(list_empty(&lock->wait_list))) 262 if (likely(list_empty(&lock->wait_list)))
@@ -196,6 +265,7 @@ done:
196 spin_unlock_mutex(&lock->wait_lock, flags); 265 spin_unlock_mutex(&lock->wait_lock, flags);
197 266
198 debug_mutex_free_waiter(&waiter); 267 debug_mutex_free_waiter(&waiter);
268 preempt_enable();
199 269
200 return 0; 270 return 0;
201} 271}
@@ -222,7 +292,8 @@ int __sched
222mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) 292mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
223{ 293{
224 might_sleep(); 294 might_sleep();
225 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); 295 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
296 subclass, _RET_IP_);
226} 297}
227 298
228EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 299EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -260,8 +331,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
260 wake_up_process(waiter->task); 331 wake_up_process(waiter->task);
261 } 332 }
262 333
263 debug_mutex_clear_owner(lock);
264
265 spin_unlock_mutex(&lock->wait_lock, flags); 334 spin_unlock_mutex(&lock->wait_lock, flags);
266} 335}
267 336
@@ -298,18 +367,30 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count);
298 */ 367 */
299int __sched mutex_lock_interruptible(struct mutex *lock) 368int __sched mutex_lock_interruptible(struct mutex *lock)
300{ 369{
370 int ret;
371
301 might_sleep(); 372 might_sleep();
302 return __mutex_fastpath_lock_retval 373 ret = __mutex_fastpath_lock_retval
303 (&lock->count, __mutex_lock_interruptible_slowpath); 374 (&lock->count, __mutex_lock_interruptible_slowpath);
375 if (!ret)
376 mutex_set_owner(lock);
377
378 return ret;
304} 379}
305 380
306EXPORT_SYMBOL(mutex_lock_interruptible); 381EXPORT_SYMBOL(mutex_lock_interruptible);
307 382
308int __sched mutex_lock_killable(struct mutex *lock) 383int __sched mutex_lock_killable(struct mutex *lock)
309{ 384{
385 int ret;
386
310 might_sleep(); 387 might_sleep();
311 return __mutex_fastpath_lock_retval 388 ret = __mutex_fastpath_lock_retval
312 (&lock->count, __mutex_lock_killable_slowpath); 389 (&lock->count, __mutex_lock_killable_slowpath);
390 if (!ret)
391 mutex_set_owner(lock);
392
393 return ret;
313} 394}
314EXPORT_SYMBOL(mutex_lock_killable); 395EXPORT_SYMBOL(mutex_lock_killable);
315 396
@@ -352,9 +433,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
352 433
353 prev = atomic_xchg(&lock->count, -1); 434 prev = atomic_xchg(&lock->count, -1);
354 if (likely(prev == 1)) { 435 if (likely(prev == 1)) {
355 debug_mutex_set_owner(lock, current_thread_info()); 436 mutex_set_owner(lock);
356 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 437 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
357 } 438 }
439
358 /* Set it back to 0 if there are no waiters: */ 440 /* Set it back to 0 if there are no waiters: */
359 if (likely(list_empty(&lock->wait_list))) 441 if (likely(list_empty(&lock->wait_list)))
360 atomic_set(&lock->count, 0); 442 atomic_set(&lock->count, 0);
@@ -380,8 +462,13 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
380 */ 462 */
381int __sched mutex_trylock(struct mutex *lock) 463int __sched mutex_trylock(struct mutex *lock)
382{ 464{
383 return __mutex_fastpath_trylock(&lock->count, 465 int ret;
384 __mutex_trylock_slowpath); 466
467 ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
468 if (ret)
469 mutex_set_owner(lock);
470
471 return ret;
385} 472}
386 473
387EXPORT_SYMBOL(mutex_trylock); 474EXPORT_SYMBOL(mutex_trylock);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index a075dafbb29..67578ca48f9 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,8 +16,26 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#ifdef CONFIG_SMP
20#define debug_mutex_clear_owner(lock) do { } while (0) 20static inline void mutex_set_owner(struct mutex *lock)
21{
22 lock->owner = current_thread_info();
23}
24
25static inline void mutex_clear_owner(struct mutex *lock)
26{
27 lock->owner = NULL;
28}
29#else
30static inline void mutex_set_owner(struct mutex *lock)
31{
32}
33
34static inline void mutex_clear_owner(struct mutex *lock)
35{
36}
37#endif
38
21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 39#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
22#define debug_mutex_free_waiter(waiter) do { } while (0) 40#define debug_mutex_free_waiter(waiter) do { } while (0)
23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) 41#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
diff --git a/kernel/sched.c b/kernel/sched.c
index 328f9c7448a..e1f676e2011 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4543,15 +4543,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
4543/* 4543/*
4544 * schedule() is the main scheduler function. 4544 * schedule() is the main scheduler function.
4545 */ 4545 */
4546asmlinkage void __sched schedule(void) 4546asmlinkage void __sched __schedule(void)
4547{ 4547{
4548 struct task_struct *prev, *next; 4548 struct task_struct *prev, *next;
4549 unsigned long *switch_count; 4549 unsigned long *switch_count;
4550 struct rq *rq; 4550 struct rq *rq;
4551 int cpu; 4551 int cpu;
4552 4552
4553need_resched:
4554 preempt_disable();
4555 cpu = smp_processor_id(); 4553 cpu = smp_processor_id();
4556 rq = cpu_rq(cpu); 4554 rq = cpu_rq(cpu);
4557 rcu_qsctr_inc(cpu); 4555 rcu_qsctr_inc(cpu);
@@ -4608,13 +4606,80 @@ need_resched_nonpreemptible:
4608 4606
4609 if (unlikely(reacquire_kernel_lock(current) < 0)) 4607 if (unlikely(reacquire_kernel_lock(current) < 0))
4610 goto need_resched_nonpreemptible; 4608 goto need_resched_nonpreemptible;
4609}
4611 4610
4611asmlinkage void __sched schedule(void)
4612{
4613need_resched:
4614 preempt_disable();
4615 __schedule();
4612 preempt_enable_no_resched(); 4616 preempt_enable_no_resched();
4613 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 4617 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
4614 goto need_resched; 4618 goto need_resched;
4615} 4619}
4616EXPORT_SYMBOL(schedule); 4620EXPORT_SYMBOL(schedule);
4617 4621
4622#ifdef CONFIG_SMP
4623/*
4624 * Look out! "owner" is an entirely speculative pointer
4625 * access and not reliable.
4626 */
4627int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4628{
4629 unsigned int cpu;
4630 struct rq *rq;
4631
4632 if (!sched_feat(OWNER_SPIN))
4633 return 0;
4634
4635#ifdef CONFIG_DEBUG_PAGEALLOC
4636 /*
4637 * Need to access the cpu field knowing that
4638 * DEBUG_PAGEALLOC could have unmapped it if
4639 * the mutex owner just released it and exited.
4640 */
4641 if (probe_kernel_address(&owner->cpu, cpu))
4642 goto out;
4643#else
4644 cpu = owner->cpu;
4645#endif
4646
4647 /*
4648 * Even if the access succeeded (likely case),
4649 * the cpu field may no longer be valid.
4650 */
4651 if (cpu >= nr_cpumask_bits)
4652 goto out;
4653
4654 /*
4655 * We need to validate that we can do a
4656 * get_cpu() and that we have the percpu area.
4657 */
4658 if (!cpu_online(cpu))
4659 goto out;
4660
4661 rq = cpu_rq(cpu);
4662
4663 for (;;) {
4664 /*
4665 * Owner changed, break to re-assess state.
4666 */
4667 if (lock->owner != owner)
4668 break;
4669
4670 /*
4671 * Is that owner really running on that cpu?
4672 */
4673 if (task_thread_info(rq->curr) != owner || need_resched())
4674 return 0;
4675
4676 cpu_relax();
4677 }
4678out:
4679 return 1;
4680}
4681#endif
4682
4618#ifdef CONFIG_PREEMPT 4683#ifdef CONFIG_PREEMPT
4619/* 4684/*
4620 * this is the entry point to schedule() from in-kernel preemption 4685 * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index da5d93b5d2c..07bc02e99ab 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -13,3 +13,4 @@ SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1) 15SCHED_FEAT(LAST_BUDDY, 1)
16SCHED_FEAT(OWNER_SPIN, 1)
diff --git a/kernel/timer.c b/kernel/timer.c
index 13dd64fe143..ef1c385bc57 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -491,14 +491,18 @@ static inline void debug_timer_free(struct timer_list *timer)
491 debug_object_free(timer, &timer_debug_descr); 491 debug_object_free(timer, &timer_debug_descr);
492} 492}
493 493
494static void __init_timer(struct timer_list *timer); 494static void __init_timer(struct timer_list *timer,
495 const char *name,
496 struct lock_class_key *key);
495 497
496void init_timer_on_stack(struct timer_list *timer) 498void init_timer_on_stack_key(struct timer_list *timer,
499 const char *name,
500 struct lock_class_key *key)
497{ 501{
498 debug_object_init_on_stack(timer, &timer_debug_descr); 502 debug_object_init_on_stack(timer, &timer_debug_descr);
499 __init_timer(timer); 503 __init_timer(timer, name, key);
500} 504}
501EXPORT_SYMBOL_GPL(init_timer_on_stack); 505EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
502 506
503void destroy_timer_on_stack(struct timer_list *timer) 507void destroy_timer_on_stack(struct timer_list *timer)
504{ 508{
@@ -512,7 +516,9 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
512static inline void debug_timer_deactivate(struct timer_list *timer) { } 516static inline void debug_timer_deactivate(struct timer_list *timer) { }
513#endif 517#endif
514 518
515static void __init_timer(struct timer_list *timer) 519static void __init_timer(struct timer_list *timer,
520 const char *name,
521 struct lock_class_key *key)
516{ 522{
517 timer->entry.next = NULL; 523 timer->entry.next = NULL;
518 timer->base = __raw_get_cpu_var(tvec_bases); 524 timer->base = __raw_get_cpu_var(tvec_bases);
@@ -521,6 +527,7 @@ static void __init_timer(struct timer_list *timer)
521 timer->start_pid = -1; 527 timer->start_pid = -1;
522 memset(timer->start_comm, 0, TASK_COMM_LEN); 528 memset(timer->start_comm, 0, TASK_COMM_LEN);
523#endif 529#endif
530 lockdep_init_map(&timer->lockdep_map, name, key, 0);
524} 531}
525 532
526/** 533/**
@@ -530,19 +537,23 @@ static void __init_timer(struct timer_list *timer)
530 * init_timer() must be done to a timer prior calling *any* of the 537 * init_timer() must be done to a timer prior calling *any* of the
531 * other timer functions. 538 * other timer functions.
532 */ 539 */
533void init_timer(struct timer_list *timer) 540void init_timer_key(struct timer_list *timer,
541 const char *name,
542 struct lock_class_key *key)
534{ 543{
535 debug_timer_init(timer); 544 debug_timer_init(timer);
536 __init_timer(timer); 545 __init_timer(timer, name, key);
537} 546}
538EXPORT_SYMBOL(init_timer); 547EXPORT_SYMBOL(init_timer_key);
539 548
540void init_timer_deferrable(struct timer_list *timer) 549void init_timer_deferrable_key(struct timer_list *timer,
550 const char *name,
551 struct lock_class_key *key)
541{ 552{
542 init_timer(timer); 553 init_timer_key(timer, name, key);
543 timer_set_deferrable(timer); 554 timer_set_deferrable(timer);
544} 555}
545EXPORT_SYMBOL(init_timer_deferrable); 556EXPORT_SYMBOL(init_timer_deferrable_key);
546 557
547static inline void detach_timer(struct timer_list *timer, 558static inline void detach_timer(struct timer_list *timer,
548 int clear_pending) 559 int clear_pending)
@@ -789,6 +800,15 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
789 */ 800 */
790int del_timer_sync(struct timer_list *timer) 801int del_timer_sync(struct timer_list *timer)
791{ 802{
803#ifdef CONFIG_LOCKDEP
804 unsigned long flags;
805
806 local_irq_save(flags);
807 lock_map_acquire(&timer->lockdep_map);
808 lock_map_release(&timer->lockdep_map);
809 local_irq_restore(flags);
810#endif
811
792 for (;;) { 812 for (;;) {
793 int ret = try_to_del_timer_sync(timer); 813 int ret = try_to_del_timer_sync(timer);
794 if (ret >= 0) 814 if (ret >= 0)
@@ -861,10 +881,36 @@ static inline void __run_timers(struct tvec_base *base)
861 881
862 set_running_timer(base, timer); 882 set_running_timer(base, timer);
863 detach_timer(timer, 1); 883 detach_timer(timer, 1);
884
864 spin_unlock_irq(&base->lock); 885 spin_unlock_irq(&base->lock);
865 { 886 {
866 int preempt_count = preempt_count(); 887 int preempt_count = preempt_count();
888
889#ifdef CONFIG_LOCKDEP
890 /*
891 * It is permissible to free the timer from
892 * inside the function that is called from
893 * it, this we need to take into account for
894 * lockdep too. To avoid bogus "held lock
895 * freed" warnings as well as problems when
896 * looking into timer->lockdep_map, make a
897 * copy and use that here.
898 */
899 struct lockdep_map lockdep_map =
900 timer->lockdep_map;
901#endif
902 /*
903 * Couple the lock chain with the lock chain at
904 * del_timer_sync() by acquiring the lock_map
905 * around the fn() call here and in
906 * del_timer_sync().
907 */
908 lock_map_acquire(&lockdep_map);
909
867 fn(data); 910 fn(data);
911
912 lock_map_release(&lockdep_map);
913
868 if (preempt_count != preempt_count()) { 914 if (preempt_count != preempt_count()) {
869 printk(KERN_ERR "huh, entered %p " 915 printk(KERN_ERR "huh, entered %p "
870 "with preempt_count %08x, exited" 916 "with preempt_count %08x, exited"
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a8c275c01e8..f2a163db52f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -61,6 +61,8 @@ enum {
61 61
62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
63 63
64#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
65
64/** 66/**
65 * tracing_on - enable all tracing buffers 67 * tracing_on - enable all tracing buffers
66 * 68 *
@@ -132,7 +134,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
132} 134}
133EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 135EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
134 136
135#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 137#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
136#define RB_ALIGNMENT 4U 138#define RB_ALIGNMENT 4U
137#define RB_MAX_SMALL_DATA 28 139#define RB_MAX_SMALL_DATA 28
138 140
@@ -234,6 +236,18 @@ static void rb_init_page(struct buffer_data_page *bpage)
234 local_set(&bpage->commit, 0); 236 local_set(&bpage->commit, 0);
235} 237}
236 238
239/**
240 * ring_buffer_page_len - the size of data on the page.
241 * @page: The page to read
242 *
243 * Returns the amount of data on the page, including buffer page header.
244 */
245size_t ring_buffer_page_len(void *page)
246{
247 return local_read(&((struct buffer_data_page *)page)->commit)
248 + BUF_PAGE_HDR_SIZE;
249}
250
237/* 251/*
238 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 252 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
239 * this issue out. 253 * this issue out.
@@ -254,7 +268,7 @@ static inline int test_time_stamp(u64 delta)
254 return 0; 268 return 0;
255} 269}
256 270
257#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) 271#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
258 272
259/* 273/*
260 * head_page == tail_page && head == tail then buffer is empty. 274 * head_page == tail_page && head == tail then buffer is empty.
@@ -2378,8 +2392,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2378 */ 2392 */
2379void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 2393void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2380{ 2394{
2381 unsigned long addr;
2382 struct buffer_data_page *bpage; 2395 struct buffer_data_page *bpage;
2396 unsigned long addr;
2383 2397
2384 addr = __get_free_page(GFP_KERNEL); 2398 addr = __get_free_page(GFP_KERNEL);
2385 if (!addr) 2399 if (!addr)
@@ -2387,6 +2401,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2387 2401
2388 bpage = (void *)addr; 2402 bpage = (void *)addr;
2389 2403
2404 rb_init_page(bpage);
2405
2390 return bpage; 2406 return bpage;
2391} 2407}
2392 2408
@@ -2406,6 +2422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2406 * ring_buffer_read_page - extract a page from the ring buffer 2422 * ring_buffer_read_page - extract a page from the ring buffer
2407 * @buffer: buffer to extract from 2423 * @buffer: buffer to extract from
2408 * @data_page: the page to use allocated from ring_buffer_alloc_read_page 2424 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2425 * @len: amount to extract
2409 * @cpu: the cpu of the buffer to extract 2426 * @cpu: the cpu of the buffer to extract
2410 * @full: should the extraction only happen when the page is full. 2427 * @full: should the extraction only happen when the page is full.
2411 * 2428 *
@@ -2418,7 +2435,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2418 * rpage = ring_buffer_alloc_read_page(buffer); 2435 * rpage = ring_buffer_alloc_read_page(buffer);
2419 * if (!rpage) 2436 * if (!rpage)
2420 * return error; 2437 * return error;
2421 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2438 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
2422 * if (ret >= 0) 2439 * if (ret >= 0)
2423 * process_page(rpage, ret); 2440 * process_page(rpage, ret);
2424 * 2441 *
@@ -2435,70 +2452,103 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2435 * <0 if no data has been transferred. 2452 * <0 if no data has been transferred.
2436 */ 2453 */
2437int ring_buffer_read_page(struct ring_buffer *buffer, 2454int ring_buffer_read_page(struct ring_buffer *buffer,
2438 void **data_page, int cpu, int full) 2455 void **data_page, size_t len, int cpu, int full)
2439{ 2456{
2440 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2457 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2441 struct ring_buffer_event *event; 2458 struct ring_buffer_event *event;
2442 struct buffer_data_page *bpage; 2459 struct buffer_data_page *bpage;
2460 struct buffer_page *reader;
2443 unsigned long flags; 2461 unsigned long flags;
2462 unsigned int commit;
2444 unsigned int read; 2463 unsigned int read;
2445 int ret = -1; 2464 int ret = -1;
2446 2465
2466 /*
2467 * If len is not big enough to hold the page header, then
2468 * we can not copy anything.
2469 */
2470 if (len <= BUF_PAGE_HDR_SIZE)
2471 return -1;
2472
2473 len -= BUF_PAGE_HDR_SIZE;
2474
2447 if (!data_page) 2475 if (!data_page)
2448 return 0; 2476 return -1;
2449 2477
2450 bpage = *data_page; 2478 bpage = *data_page;
2451 if (!bpage) 2479 if (!bpage)
2452 return 0; 2480 return -1;
2453 2481
2454 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2482 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2455 2483
2456 /* 2484 reader = rb_get_reader_page(cpu_buffer);
2457 * rb_buffer_peek will get the next ring buffer if 2485 if (!reader)
2458 * the current reader page is empty.
2459 */
2460 event = rb_buffer_peek(buffer, cpu, NULL);
2461 if (!event)
2462 goto out; 2486 goto out;
2463 2487
2464 /* check for data */ 2488 event = rb_reader_event(cpu_buffer);
2465 if (!local_read(&cpu_buffer->reader_page->page->commit)) 2489
2466 goto out; 2490 read = reader->read;
2491 commit = rb_page_commit(reader);
2467 2492
2468 read = cpu_buffer->reader_page->read;
2469 /* 2493 /*
2470 * If the writer is already off of the read page, then simply 2494 * If this page has been partially read or
2471 * switch the read page with the given page. Otherwise 2495 * if len is not big enough to read the rest of the page or
2472 * we need to copy the data from the reader to the writer. 2496 * a writer is still on the page, then
2497 * we must copy the data from the page to the buffer.
2498 * Otherwise, we can simply swap the page with the one passed in.
2473 */ 2499 */
2474 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2500 if (read || (len < (commit - read)) ||
2475 unsigned int commit = rb_page_commit(cpu_buffer->reader_page); 2501 cpu_buffer->reader_page == cpu_buffer->commit_page) {
2476 struct buffer_data_page *rpage = cpu_buffer->reader_page->page; 2502 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2503 unsigned int rpos = read;
2504 unsigned int pos = 0;
2505 unsigned int size;
2477 2506
2478 if (full) 2507 if (full)
2479 goto out; 2508 goto out;
2480 /* The writer is still on the reader page, we must copy */
2481 memcpy(bpage->data + read, rpage->data + read, commit - read);
2482 2509
2483 /* consume what was read */ 2510 if (len > (commit - read))
2484 cpu_buffer->reader_page->read = commit; 2511 len = (commit - read);
2512
2513 size = rb_event_length(event);
2514
2515 if (len < size)
2516 goto out;
2517
2518 /* Need to copy one event at a time */
2519 do {
2520 memcpy(bpage->data + pos, rpage->data + rpos, size);
2521
2522 len -= size;
2523
2524 rb_advance_reader(cpu_buffer);
2525 rpos = reader->read;
2526 pos += size;
2527
2528 event = rb_reader_event(cpu_buffer);
2529 size = rb_event_length(event);
2530 } while (len > size);
2485 2531
2486 /* update bpage */ 2532 /* update bpage */
2487 local_set(&bpage->commit, commit); 2533 local_set(&bpage->commit, pos);
2488 if (!read) 2534 bpage->time_stamp = rpage->time_stamp;
2489 bpage->time_stamp = rpage->time_stamp; 2535
2536 /* we copied everything to the beginning */
2537 read = 0;
2490 } else { 2538 } else {
2491 /* swap the pages */ 2539 /* swap the pages */
2492 rb_init_page(bpage); 2540 rb_init_page(bpage);
2493 bpage = cpu_buffer->reader_page->page; 2541 bpage = reader->page;
2494 cpu_buffer->reader_page->page = *data_page; 2542 reader->page = *data_page;
2495 cpu_buffer->reader_page->read = 0; 2543 local_set(&reader->write, 0);
2544 reader->read = 0;
2496 *data_page = bpage; 2545 *data_page = bpage;
2546
2547 /* update the entry counter */
2548 rb_remove_entries(cpu_buffer, bpage, read);
2497 } 2549 }
2498 ret = read; 2550 ret = read;
2499 2551
2500 /* update the entry counter */
2501 rb_remove_entries(cpu_buffer, bpage, read);
2502 out: 2552 out:
2503 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2553 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2504 2554
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ea055aa21cd..c8abbb0c839 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -11,31 +11,30 @@
11 * Copyright (C) 2004-2006 Ingo Molnar 11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h>
14#include <linux/utsrelease.h> 15#include <linux/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
15#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 19#include <linux/seq_file.h>
17#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h>
18#include <linux/debugfs.h> 22#include <linux/debugfs.h>
19#include <linux/pagemap.h> 23#include <linux/pagemap.h>
20#include <linux/hardirq.h> 24#include <linux/hardirq.h>
21#include <linux/linkage.h> 25#include <linux/linkage.h>
22#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/kprobes.h>
23#include <linux/ftrace.h> 28#include <linux/ftrace.h>
24#include <linux/module.h> 29#include <linux/module.h>
25#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h>
26#include <linux/kdebug.h> 32#include <linux/kdebug.h>
27#include <linux/ctype.h> 33#include <linux/ctype.h>
28#include <linux/init.h> 34#include <linux/init.h>
29#include <linux/poll.h> 35#include <linux/poll.h>
30#include <linux/gfp.h> 36#include <linux/gfp.h>
31#include <linux/fs.h> 37#include <linux/fs.h>
32#include <linux/kprobes.h>
33#include <linux/writeback.h>
34#include <linux/splice.h>
35
36#include <linux/stacktrace.h>
37#include <linux/ring_buffer.h>
38#include <linux/irqflags.h>
39 38
40#include "trace.h" 39#include "trace.h"
41#include "trace_output.h" 40#include "trace_output.h"
@@ -624,7 +623,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
624static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 623static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
625static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 624static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
626static int cmdline_idx; 625static int cmdline_idx;
627static DEFINE_SPINLOCK(trace_cmdline_lock); 626static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
628 627
629/* temporary disable recording */ 628/* temporary disable recording */
630static atomic_t trace_record_cmdline_disabled __read_mostly; 629static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -736,7 +735,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
736 * nor do we want to disable interrupts, 735 * nor do we want to disable interrupts,
737 * so if we miss here, then better luck next time. 736 * so if we miss here, then better luck next time.
738 */ 737 */
739 if (!spin_trylock(&trace_cmdline_lock)) 738 if (!__raw_spin_trylock(&trace_cmdline_lock))
740 return; 739 return;
741 740
742 idx = map_pid_to_cmdline[tsk->pid]; 741 idx = map_pid_to_cmdline[tsk->pid];
@@ -754,7 +753,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
754 753
755 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 754 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
756 755
757 spin_unlock(&trace_cmdline_lock); 756 __raw_spin_unlock(&trace_cmdline_lock);
758} 757}
759 758
760char *trace_find_cmdline(int pid) 759char *trace_find_cmdline(int pid)
@@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = {
3005 .write = tracing_mark_write, 3004 .write = tracing_mark_write,
3006}; 3005};
3007 3006
3007struct ftrace_buffer_info {
3008 struct trace_array *tr;
3009 void *spare;
3010 int cpu;
3011 unsigned int read;
3012};
3013
3014static int tracing_buffers_open(struct inode *inode, struct file *filp)
3015{
3016 int cpu = (int)(long)inode->i_private;
3017 struct ftrace_buffer_info *info;
3018
3019 if (tracing_disabled)
3020 return -ENODEV;
3021
3022 info = kzalloc(sizeof(*info), GFP_KERNEL);
3023 if (!info)
3024 return -ENOMEM;
3025
3026 info->tr = &global_trace;
3027 info->cpu = cpu;
3028 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3029 /* Force reading ring buffer for first read */
3030 info->read = (unsigned int)-1;
3031 if (!info->spare)
3032 goto out;
3033
3034 filp->private_data = info;
3035
3036 return 0;
3037
3038 out:
3039 kfree(info);
3040 return -ENOMEM;
3041}
3042
3043static ssize_t
3044tracing_buffers_read(struct file *filp, char __user *ubuf,
3045 size_t count, loff_t *ppos)
3046{
3047 struct ftrace_buffer_info *info = filp->private_data;
3048 unsigned int pos;
3049 ssize_t ret;
3050 size_t size;
3051
3052 /* Do we have previous read data to read? */
3053 if (info->read < PAGE_SIZE)
3054 goto read;
3055
3056 info->read = 0;
3057
3058 ret = ring_buffer_read_page(info->tr->buffer,
3059 &info->spare,
3060 count,
3061 info->cpu, 0);
3062 if (ret < 0)
3063 return 0;
3064
3065 pos = ring_buffer_page_len(info->spare);
3066
3067 if (pos < PAGE_SIZE)
3068 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3069
3070read:
3071 size = PAGE_SIZE - info->read;
3072 if (size > count)
3073 size = count;
3074
3075 ret = copy_to_user(ubuf, info->spare + info->read, size);
3076 if (ret)
3077 return -EFAULT;
3078 *ppos += size;
3079 info->read += size;
3080
3081 return size;
3082}
3083
3084static int tracing_buffers_release(struct inode *inode, struct file *file)
3085{
3086 struct ftrace_buffer_info *info = file->private_data;
3087
3088 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3089 kfree(info);
3090
3091 return 0;
3092}
3093
3094struct buffer_ref {
3095 struct ring_buffer *buffer;
3096 void *page;
3097 int ref;
3098};
3099
3100static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3101 struct pipe_buffer *buf)
3102{
3103 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3104
3105 if (--ref->ref)
3106 return;
3107
3108 ring_buffer_free_read_page(ref->buffer, ref->page);
3109 kfree(ref);
3110 buf->private = 0;
3111}
3112
3113static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3114 struct pipe_buffer *buf)
3115{
3116 return 1;
3117}
3118
3119static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3120 struct pipe_buffer *buf)
3121{
3122 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3123
3124 ref->ref++;
3125}
3126
3127/* Pipe buffer operations for a buffer. */
3128static struct pipe_buf_operations buffer_pipe_buf_ops = {
3129 .can_merge = 0,
3130 .map = generic_pipe_buf_map,
3131 .unmap = generic_pipe_buf_unmap,
3132 .confirm = generic_pipe_buf_confirm,
3133 .release = buffer_pipe_buf_release,
3134 .steal = buffer_pipe_buf_steal,
3135 .get = buffer_pipe_buf_get,
3136};
3137
3138/*
3139 * Callback from splice_to_pipe(), if we need to release some pages
3140 * at the end of the spd in case we error'ed out in filling the pipe.
3141 */
3142static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3143{
3144 struct buffer_ref *ref =
3145 (struct buffer_ref *)spd->partial[i].private;
3146
3147 if (--ref->ref)
3148 return;
3149
3150 ring_buffer_free_read_page(ref->buffer, ref->page);
3151 kfree(ref);
3152 spd->partial[i].private = 0;
3153}
3154
3155static ssize_t
3156tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3157 struct pipe_inode_info *pipe, size_t len,
3158 unsigned int flags)
3159{
3160 struct ftrace_buffer_info *info = file->private_data;
3161 struct partial_page partial[PIPE_BUFFERS];
3162 struct page *pages[PIPE_BUFFERS];
3163 struct splice_pipe_desc spd = {
3164 .pages = pages,
3165 .partial = partial,
3166 .flags = flags,
3167 .ops = &buffer_pipe_buf_ops,
3168 .spd_release = buffer_spd_release,
3169 };
3170 struct buffer_ref *ref;
3171 int size, i;
3172 size_t ret;
3173
3174 /*
3175 * We can't seek on a buffer input
3176 */
3177 if (unlikely(*ppos))
3178 return -ESPIPE;
3179
3180
3181 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
3182 struct page *page;
3183 int r;
3184
3185 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3186 if (!ref)
3187 break;
3188
3189 ref->buffer = info->tr->buffer;
3190 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3191 if (!ref->page) {
3192 kfree(ref);
3193 break;
3194 }
3195
3196 r = ring_buffer_read_page(ref->buffer, &ref->page,
3197 len, info->cpu, 0);
3198 if (r < 0) {
3199 ring_buffer_free_read_page(ref->buffer,
3200 ref->page);
3201 kfree(ref);
3202 break;
3203 }
3204
3205 /*
3206 * zero out any left over data, this is going to
3207 * user land.
3208 */
3209 size = ring_buffer_page_len(ref->page);
3210 if (size < PAGE_SIZE)
3211 memset(ref->page + size, 0, PAGE_SIZE - size);
3212
3213 page = virt_to_page(ref->page);
3214
3215 spd.pages[i] = page;
3216 spd.partial[i].len = PAGE_SIZE;
3217 spd.partial[i].offset = 0;
3218 spd.partial[i].private = (unsigned long)ref;
3219 spd.nr_pages++;
3220 }
3221
3222 spd.nr_pages = i;
3223
3224 /* did we read anything? */
3225 if (!spd.nr_pages) {
3226 if (flags & SPLICE_F_NONBLOCK)
3227 ret = -EAGAIN;
3228 else
3229 ret = 0;
3230 /* TODO: block */
3231 return ret;
3232 }
3233
3234 ret = splice_to_pipe(pipe, &spd);
3235
3236 return ret;
3237}
3238
3239static const struct file_operations tracing_buffers_fops = {
3240 .open = tracing_buffers_open,
3241 .read = tracing_buffers_read,
3242 .release = tracing_buffers_release,
3243 .splice_read = tracing_buffers_splice_read,
3244 .llseek = no_llseek,
3245};
3246
3008#ifdef CONFIG_DYNAMIC_FTRACE 3247#ifdef CONFIG_DYNAMIC_FTRACE
3009 3248
3010int __weak ftrace_arch_read_dyn_info(char *buf, int size) 3249int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void)
3399static __init int tracer_init_debugfs(void) 3638static __init int tracer_init_debugfs(void)
3400{ 3639{
3401 struct dentry *d_tracer; 3640 struct dentry *d_tracer;
3641 struct dentry *buffers;
3402 struct dentry *entry; 3642 struct dentry *entry;
3403 int cpu; 3643 int cpu;
3404 3644
@@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void)
3471 pr_warning("Could not create debugfs " 3711 pr_warning("Could not create debugfs "
3472 "'trace_marker' entry\n"); 3712 "'trace_marker' entry\n");
3473 3713
3714 buffers = debugfs_create_dir("binary_buffers", d_tracer);
3715
3716 if (!buffers)
3717 pr_warning("Could not create buffers directory\n");
3718 else {
3719 int cpu;
3720 char buf[64];
3721
3722 for_each_tracing_cpu(cpu) {
3723 sprintf(buf, "%d", cpu);
3724
3725 entry = debugfs_create_file(buf, 0444, buffers,
3726 (void *)(long)cpu,
3727 &tracing_buffers_fops);
3728 if (!entry)
3729 pr_warning("Could not create debugfs buffers "
3730 "'%s' entry\n", buf);
3731 }
3732 }
3733
3474#ifdef CONFIG_DYNAMIC_FTRACE 3734#ifdef CONFIG_DYNAMIC_FTRACE
3475 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3735 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3476 &ftrace_update_tot_cnt, 3736 &ftrace_update_tot_cnt,
@@ -3491,7 +3751,7 @@ static __init int tracer_init_debugfs(void)
3491 3751
3492int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) 3752int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3493{ 3753{
3494 static DEFINE_SPINLOCK(trace_buf_lock); 3754 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
3495 static char trace_buf[TRACE_BUF_SIZE]; 3755 static char trace_buf[TRACE_BUF_SIZE];
3496 3756
3497 struct ring_buffer_event *event; 3757 struct ring_buffer_event *event;
@@ -3513,7 +3773,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3513 goto out; 3773 goto out;
3514 3774
3515 pause_graph_tracing(); 3775 pause_graph_tracing();
3516 spin_lock_irqsave(&trace_buf_lock, irq_flags); 3776 raw_local_irq_save(irq_flags);
3777 __raw_spin_lock(&trace_buf_lock);
3517 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 3778 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3518 3779
3519 len = min(len, TRACE_BUF_SIZE-1); 3780 len = min(len, TRACE_BUF_SIZE-1);
@@ -3532,7 +3793,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3532 ring_buffer_unlock_commit(tr->buffer, event); 3793 ring_buffer_unlock_commit(tr->buffer, event);
3533 3794
3534 out_unlock: 3795 out_unlock:
3535 spin_unlock_irqrestore(&trace_buf_lock, irq_flags); 3796 __raw_spin_unlock(&trace_buf_lock);
3797 raw_local_irq_restore(irq_flags);
3536 unpause_graph_tracing(); 3798 unpause_graph_tracing();
3537 out: 3799 out:
3538 preempt_enable_notrace(); 3800 preempt_enable_notrace();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e606633fb49..561bb5c5d98 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -217,6 +217,7 @@ enum trace_flag_type {
217 */ 217 */
218struct trace_array_cpu { 218struct trace_array_cpu {
219 atomic_t disabled; 219 atomic_t disabled;
220 void *buffer_page; /* ring buffer spare */
220 221
221 /* these fields get copied into max-trace: */ 222 /* these fields get copied into max-trace: */
222 unsigned long trace_idx; 223 unsigned long trace_idx;
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 041789ffbac..2c8d76c7dbe 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * static void ftrace_event_<call>(proto) 6 * static void ftrace_event_<call>(proto)
7 * { 7 * {
8 * event_trace_printk(_RET_IP_, "(<call>) " <fmt>); 8 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
9 * } 9 * }
10 * 10 *
11 * static int ftrace_reg_event_<call>(void) 11 * static int ftrace_reg_event_<call>(void)
@@ -112,7 +112,7 @@
112#define _TRACE_FORMAT(call, proto, args, fmt) \ 112#define _TRACE_FORMAT(call, proto, args, fmt) \
113static void ftrace_event_##call(proto) \ 113static void ftrace_event_##call(proto) \
114{ \ 114{ \
115 event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ 115 event_trace_printk(_RET_IP_, #call ": " fmt); \
116} \ 116} \
117 \ 117 \
118static int ftrace_reg_event_##call(void) \ 118static int ftrace_reg_event_##call(void) \
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1bcf9cd4baa..a0879b2e8b6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -402,7 +402,7 @@ config LOCKDEP
402 bool 402 bool
403 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 403 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
404 select STACKTRACE 404 select STACKTRACE
405 select FRAME_POINTER if !X86 && !MIPS && !PPC 405 select FRAME_POINTER if !MIPS && !PPC
406 select KALLSYMS 406 select KALLSYMS
407 select KALLSYMS_ALL 407 select KALLSYMS_ALL
408 408
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5c44ed49ca9..a3803ea8c27 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1479,6 +1479,8 @@ __alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
1479 unsigned long did_some_progress; 1479 unsigned long did_some_progress;
1480 unsigned long pages_reclaimed = 0; 1480 unsigned long pages_reclaimed = 0;
1481 1481
1482 lockdep_trace_alloc(gfp_mask);
1483
1482 might_sleep_if(wait); 1484 might_sleep_if(wait);
1483 1485
1484 if (should_fail_alloc_page(gfp_mask, order)) 1486 if (should_fail_alloc_page(gfp_mask, order))
@@ -1578,12 +1580,15 @@ nofail_alloc:
1578 */ 1580 */
1579 cpuset_update_task_memory_state(); 1581 cpuset_update_task_memory_state();
1580 p->flags |= PF_MEMALLOC; 1582 p->flags |= PF_MEMALLOC;
1583
1584 lockdep_set_current_reclaim_state(gfp_mask);
1581 reclaim_state.reclaimed_slab = 0; 1585 reclaim_state.reclaimed_slab = 0;
1582 p->reclaim_state = &reclaim_state; 1586 p->reclaim_state = &reclaim_state;
1583 1587
1584 did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); 1588 did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
1585 1589
1586 p->reclaim_state = NULL; 1590 p->reclaim_state = NULL;
1591 lockdep_clear_current_reclaim_state();
1587 p->flags &= ~PF_MEMALLOC; 1592 p->flags &= ~PF_MEMALLOC;
1588 1593
1589 cond_resched(); 1594 cond_resched();
diff --git a/mm/slab.c b/mm/slab.c
index aeeb4ecb942..9ec66c3e6ee 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3327,6 +3327,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3327 unsigned long save_flags; 3327 unsigned long save_flags;
3328 void *ptr; 3328 void *ptr;
3329 3329
3330 lockdep_trace_alloc(flags);
3331
3330 if (slab_should_failslab(cachep, flags)) 3332 if (slab_should_failslab(cachep, flags))
3331 return NULL; 3333 return NULL;
3332 3334
@@ -3403,6 +3405,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3403 unsigned long save_flags; 3405 unsigned long save_flags;
3404 void *objp; 3406 void *objp;
3405 3407
3408 lockdep_trace_alloc(flags);
3409
3406 if (slab_should_failslab(cachep, flags)) 3410 if (slab_should_failslab(cachep, flags))
3407 return NULL; 3411 return NULL;
3408 3412
diff --git a/mm/slob.c b/mm/slob.c
index f9cc2468823..596152926a8 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -466,6 +466,8 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
466 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 466 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
467 void *ret; 467 void *ret;
468 468
469 lockdep_trace_alloc(flags);
470
469 if (size < PAGE_SIZE - align) { 471 if (size < PAGE_SIZE - align) {
470 if (!size) 472 if (!size)
471 return ZERO_SIZE_PTR; 473 return ZERO_SIZE_PTR;
diff --git a/mm/slub.c b/mm/slub.c
index 6de5e07c885..816734ed8aa 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1597,6 +1597,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1597 unsigned long flags; 1597 unsigned long flags;
1598 unsigned int objsize; 1598 unsigned int objsize;
1599 1599
1600 lockdep_trace_alloc(gfpflags);
1600 might_sleep_if(gfpflags & __GFP_WAIT); 1601 might_sleep_if(gfpflags & __GFP_WAIT);
1601 1602
1602 if (should_failslab(s->objsize, gfpflags)) 1603 if (should_failslab(s->objsize, gfpflags))
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6177e3bcd66..ae6f4c174a1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1965,6 +1965,8 @@ static int kswapd(void *p)
1965 }; 1965 };
1966 node_to_cpumask_ptr(cpumask, pgdat->node_id); 1966 node_to_cpumask_ptr(cpumask, pgdat->node_id);
1967 1967
1968 lockdep_set_current_reclaim_state(GFP_KERNEL);
1969
1968 if (!cpumask_empty(cpumask)) 1970 if (!cpumask_empty(cpumask))
1969 set_cpus_allowed_ptr(tsk, cpumask); 1971 set_cpus_allowed_ptr(tsk, cpumask);
1970 current->reclaim_state = &reclaim_state; 1972 current->reclaim_state = &reclaim_state;