aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-14 02:30:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-14 02:30:30 -0500
commit5e30025a319910695f5010dc0fb53a23299da14d (patch)
tree4292bcf78de221c7de1774ccf5ad0ac5a9315c26 /kernel/locking
parent7971e23a66c94f1b9bd2d64a3e86dfbfa8c60121 (diff)
parent90d3839b90fe379557dae4a44735a6af78f42885 (diff)
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking changes from Ingo Molnar: "The biggest changes: - add lockdep support for seqcount/seqlocks structures, this unearthed both bugs and required extra annotation. - move the various kernel locking primitives to the new kernel/locking/ directory" * 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) block: Use u64_stats_init() to initialize seqcounts locking/lockdep: Mark __lockdep_count_forward_deps() as static lockdep/proc: Fix lock-time avg computation locking/doc: Update references to kernel/mutex.c ipv6: Fix possible ipv6 seqlock deadlock cpuset: Fix potential deadlock w/ set_mems_allowed seqcount: Add lockdep functionality to seqcount/seqlock structures net: Explicitly initialize u64_stats_sync structures for lockdep locking: Move the percpu-rwsem code to kernel/locking/ locking: Move the lglocks code to kernel/locking/ locking: Move the rwsem code to kernel/locking/ locking: Move the rtmutex code to kernel/locking/ locking: Move the semaphore core to kernel/locking/ locking: Move the spinlock code to kernel/locking/ locking: Move the lockdep code to kernel/locking/ locking: Move the mutex code to kernel/locking/ hung_task debugging: Add tracepoint to report the hang x86/locking/kconfig: Update paravirt spinlock Kconfig description lockstat: Report avg wait and hold times lockdep, x86/alternatives: Drop ancient lockdep fixup message ...
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/Makefile25
-rw-r--r--kernel/locking/lglock.c89
-rw-r--r--kernel/locking/lockdep.c4257
-rw-r--r--kernel/locking/lockdep_internals.h170
-rw-r--r--kernel/locking/lockdep_proc.c683
-rw-r--r--kernel/locking/lockdep_states.h9
-rw-r--r--kernel/locking/mutex-debug.c110
-rw-r--r--kernel/locking/mutex-debug.h55
-rw-r--r--kernel/locking/mutex.c960
-rw-r--r--kernel/locking/mutex.h48
-rw-r--r--kernel/locking/percpu-rwsem.c165
-rw-r--r--kernel/locking/rtmutex-debug.c187
-rw-r--r--kernel/locking/rtmutex-debug.h33
-rw-r--r--kernel/locking/rtmutex-tester.c420
-rw-r--r--kernel/locking/rtmutex.c1060
-rw-r--r--kernel/locking/rtmutex.h26
-rw-r--r--kernel/locking/rtmutex_common.h126
-rw-r--r--kernel/locking/rwsem-spinlock.c296
-rw-r--r--kernel/locking/rwsem-xadd.c293
-rw-r--r--kernel/locking/rwsem.c157
-rw-r--r--kernel/locking/semaphore.c263
-rw-r--r--kernel/locking/spinlock.c399
-rw-r--r--kernel/locking/spinlock_debug.c302
23 files changed, 10133 insertions, 0 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
new file mode 100644
index 000000000000..baab8e5e7f66
--- /dev/null
+++ b/kernel/locking/Makefile
@@ -0,0 +1,25 @@
1
2obj-y += mutex.o semaphore.o rwsem.o lglock.o
3
4ifdef CONFIG_FUNCTION_TRACER
5CFLAGS_REMOVE_lockdep.o = -pg
6CFLAGS_REMOVE_lockdep_proc.o = -pg
7CFLAGS_REMOVE_mutex-debug.o = -pg
8CFLAGS_REMOVE_rtmutex-debug.o = -pg
9endif
10
11obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
12obj-$(CONFIG_LOCKDEP) += lockdep.o
13ifeq ($(CONFIG_PROC_FS),y)
14obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
15endif
16obj-$(CONFIG_SMP) += spinlock.o
17obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
18obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
19obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
20obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
21obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
22obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
23obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
24obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
25obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
new file mode 100644
index 000000000000..86ae2aebf004
--- /dev/null
+++ b/kernel/locking/lglock.c
@@ -0,0 +1,89 @@
1/* See include/linux/lglock.h for description */
2#include <linux/module.h>
3#include <linux/lglock.h>
4#include <linux/cpu.h>
5#include <linux/string.h>
6
7/*
8 * Note there is no uninit, so lglocks cannot be defined in
9 * modules (but it's fine to use them from there)
10 * Could be added though, just undo lg_lock_init
11 */
12
13void lg_lock_init(struct lglock *lg, char *name)
14{
15 LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
16}
17EXPORT_SYMBOL(lg_lock_init);
18
19void lg_local_lock(struct lglock *lg)
20{
21 arch_spinlock_t *lock;
22
23 preempt_disable();
24 lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
25 lock = this_cpu_ptr(lg->lock);
26 arch_spin_lock(lock);
27}
28EXPORT_SYMBOL(lg_local_lock);
29
30void lg_local_unlock(struct lglock *lg)
31{
32 arch_spinlock_t *lock;
33
34 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
35 lock = this_cpu_ptr(lg->lock);
36 arch_spin_unlock(lock);
37 preempt_enable();
38}
39EXPORT_SYMBOL(lg_local_unlock);
40
41void lg_local_lock_cpu(struct lglock *lg, int cpu)
42{
43 arch_spinlock_t *lock;
44
45 preempt_disable();
46 lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
47 lock = per_cpu_ptr(lg->lock, cpu);
48 arch_spin_lock(lock);
49}
50EXPORT_SYMBOL(lg_local_lock_cpu);
51
52void lg_local_unlock_cpu(struct lglock *lg, int cpu)
53{
54 arch_spinlock_t *lock;
55
56 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
57 lock = per_cpu_ptr(lg->lock, cpu);
58 arch_spin_unlock(lock);
59 preempt_enable();
60}
61EXPORT_SYMBOL(lg_local_unlock_cpu);
62
63void lg_global_lock(struct lglock *lg)
64{
65 int i;
66
67 preempt_disable();
68 lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
69 for_each_possible_cpu(i) {
70 arch_spinlock_t *lock;
71 lock = per_cpu_ptr(lg->lock, i);
72 arch_spin_lock(lock);
73 }
74}
75EXPORT_SYMBOL(lg_global_lock);
76
77void lg_global_unlock(struct lglock *lg)
78{
79 int i;
80
81 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
82 for_each_possible_cpu(i) {
83 arch_spinlock_t *lock;
84 lock = per_cpu_ptr(lg->lock, i);
85 arch_spin_unlock(lock);
86 }
87 preempt_enable();
88}
89EXPORT_SYMBOL(lg_global_unlock);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
new file mode 100644
index 000000000000..576ba756a32d
--- /dev/null
+++ b/kernel/locking/lockdep.c
@@ -0,0 +1,4257 @@
1/*
2 * kernel/lockdep.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
10 *
11 * this code maps all the lock dependencies as they occur in a live kernel
12 * and will warn about the following classes of locking bugs:
13 *
14 * - lock inversion scenarios
15 * - circular lock dependencies
16 * - hardirq/softirq safe/unsafe locking bugs
17 *
18 * Bugs are reported even if the current locking scenario does not cause
19 * any deadlock at this point.
20 *
21 * I.e. if anytime in the past two locks were taken in a different order,
22 * even if it happened for another task, even if those were different
23 * locks (but of the same class as this lock), this code will detect it.
24 *
25 * Thanks to Arjan van de Ven for coming up with the initial idea of
26 * mapping lock dependencies runtime.
27 */
28#define DISABLE_BRANCH_PROFILING
29#include <linux/mutex.h>
30#include <linux/sched.h>
31#include <linux/delay.h>
32#include <linux/module.h>
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
35#include <linux/spinlock.h>
36#include <linux/kallsyms.h>
37#include <linux/interrupt.h>
38#include <linux/stacktrace.h>
39#include <linux/debug_locks.h>
40#include <linux/irqflags.h>
41#include <linux/utsname.h>
42#include <linux/hash.h>
43#include <linux/ftrace.h>
44#include <linux/stringify.h>
45#include <linux/bitops.h>
46#include <linux/gfp.h>
47#include <linux/kmemcheck.h>
48
49#include <asm/sections.h>
50
51#include "lockdep_internals.h"
52
53#define CREATE_TRACE_POINTS
54#include <trace/events/lock.h>
55
56#ifdef CONFIG_PROVE_LOCKING
57int prove_locking = 1;
58module_param(prove_locking, int, 0644);
59#else
60#define prove_locking 0
61#endif
62
63#ifdef CONFIG_LOCK_STAT
64int lock_stat = 1;
65module_param(lock_stat, int, 0644);
66#else
67#define lock_stat 0
68#endif
69
70/*
71 * lockdep_lock: protects the lockdep graph, the hashes and the
72 * class/list/hash allocators.
73 *
74 * This is one of the rare exceptions where it's justified
75 * to use a raw spinlock - we really dont want the spinlock
76 * code to recurse back into the lockdep code...
77 */
78static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
79
80static int graph_lock(void)
81{
82 arch_spin_lock(&lockdep_lock);
83 /*
84 * Make sure that if another CPU detected a bug while
85 * walking the graph we dont change it (while the other
86 * CPU is busy printing out stuff with the graph lock
87 * dropped already)
88 */
89 if (!debug_locks) {
90 arch_spin_unlock(&lockdep_lock);
91 return 0;
92 }
93 /* prevent any recursions within lockdep from causing deadlocks */
94 current->lockdep_recursion++;
95 return 1;
96}
97
98static inline int graph_unlock(void)
99{
100 if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) {
101 /*
102 * The lockdep graph lock isn't locked while we expect it to
103 * be, we're confused now, bye!
104 */
105 return DEBUG_LOCKS_WARN_ON(1);
106 }
107
108 current->lockdep_recursion--;
109 arch_spin_unlock(&lockdep_lock);
110 return 0;
111}
112
113/*
114 * Turn lock debugging off and return with 0 if it was off already,
115 * and also release the graph lock:
116 */
117static inline int debug_locks_off_graph_unlock(void)
118{
119 int ret = debug_locks_off();
120
121 arch_spin_unlock(&lockdep_lock);
122
123 return ret;
124}
125
126static int lockdep_initialized;
127
128unsigned long nr_list_entries;
129static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
130
131/*
132 * All data structures here are protected by the global debug_lock.
133 *
134 * Mutex key structs only get allocated, once during bootup, and never
135 * get freed - this significantly simplifies the debugging code.
136 */
137unsigned long nr_lock_classes;
138static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
139
140static inline struct lock_class *hlock_class(struct held_lock *hlock)
141{
142 if (!hlock->class_idx) {
143 /*
144 * Someone passed in garbage, we give up.
145 */
146 DEBUG_LOCKS_WARN_ON(1);
147 return NULL;
148 }
149 return lock_classes + hlock->class_idx - 1;
150}
151
152#ifdef CONFIG_LOCK_STAT
153static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
154 cpu_lock_stats);
155
156static inline u64 lockstat_clock(void)
157{
158 return local_clock();
159}
160
161static int lock_point(unsigned long points[], unsigned long ip)
162{
163 int i;
164
165 for (i = 0; i < LOCKSTAT_POINTS; i++) {
166 if (points[i] == 0) {
167 points[i] = ip;
168 break;
169 }
170 if (points[i] == ip)
171 break;
172 }
173
174 return i;
175}
176
177static void lock_time_inc(struct lock_time *lt, u64 time)
178{
179 if (time > lt->max)
180 lt->max = time;
181
182 if (time < lt->min || !lt->nr)
183 lt->min = time;
184
185 lt->total += time;
186 lt->nr++;
187}
188
189static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
190{
191 if (!src->nr)
192 return;
193
194 if (src->max > dst->max)
195 dst->max = src->max;
196
197 if (src->min < dst->min || !dst->nr)
198 dst->min = src->min;
199
200 dst->total += src->total;
201 dst->nr += src->nr;
202}
203
204struct lock_class_stats lock_stats(struct lock_class *class)
205{
206 struct lock_class_stats stats;
207 int cpu, i;
208
209 memset(&stats, 0, sizeof(struct lock_class_stats));
210 for_each_possible_cpu(cpu) {
211 struct lock_class_stats *pcs =
212 &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
213
214 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
215 stats.contention_point[i] += pcs->contention_point[i];
216
217 for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
218 stats.contending_point[i] += pcs->contending_point[i];
219
220 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
221 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
222
223 lock_time_add(&pcs->read_holdtime, &stats.read_holdtime);
224 lock_time_add(&pcs->write_holdtime, &stats.write_holdtime);
225
226 for (i = 0; i < ARRAY_SIZE(stats.bounces); i++)
227 stats.bounces[i] += pcs->bounces[i];
228 }
229
230 return stats;
231}
232
233void clear_lock_stats(struct lock_class *class)
234{
235 int cpu;
236
237 for_each_possible_cpu(cpu) {
238 struct lock_class_stats *cpu_stats =
239 &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
240
241 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
242 }
243 memset(class->contention_point, 0, sizeof(class->contention_point));
244 memset(class->contending_point, 0, sizeof(class->contending_point));
245}
246
247static struct lock_class_stats *get_lock_stats(struct lock_class *class)
248{
249 return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
250}
251
252static void put_lock_stats(struct lock_class_stats *stats)
253{
254 put_cpu_var(cpu_lock_stats);
255}
256
257static void lock_release_holdtime(struct held_lock *hlock)
258{
259 struct lock_class_stats *stats;
260 u64 holdtime;
261
262 if (!lock_stat)
263 return;
264
265 holdtime = lockstat_clock() - hlock->holdtime_stamp;
266
267 stats = get_lock_stats(hlock_class(hlock));
268 if (hlock->read)
269 lock_time_inc(&stats->read_holdtime, holdtime);
270 else
271 lock_time_inc(&stats->write_holdtime, holdtime);
272 put_lock_stats(stats);
273}
274#else
275static inline void lock_release_holdtime(struct held_lock *hlock)
276{
277}
278#endif
279
280/*
281 * We keep a global list of all lock classes. The list only grows,
282 * never shrinks. The list is only accessed with the lockdep
283 * spinlock lock held.
284 */
285LIST_HEAD(all_lock_classes);
286
287/*
288 * The lockdep classes are in a hash-table as well, for fast lookup:
289 */
290#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
291#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
292#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS)
293#define classhashentry(key) (classhash_table + __classhashfn((key)))
294
295static struct list_head classhash_table[CLASSHASH_SIZE];
296
297/*
298 * We put the lock dependency chains into a hash-table as well, to cache
299 * their existence:
300 */
301#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
302#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
303#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS)
304#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
305
306static struct list_head chainhash_table[CHAINHASH_SIZE];
307
308/*
309 * The hash key of the lock dependency chains is a hash itself too:
310 * it's a hash of all locks taken up to that lock, including that lock.
311 * It's a 64-bit hash, because it's important for the keys to be
312 * unique.
313 */
314#define iterate_chain_key(key1, key2) \
315 (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \
316 ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \
317 (key2))
318
319void lockdep_off(void)
320{
321 current->lockdep_recursion++;
322}
323EXPORT_SYMBOL(lockdep_off);
324
325void lockdep_on(void)
326{
327 current->lockdep_recursion--;
328}
329EXPORT_SYMBOL(lockdep_on);
330
331/*
332 * Debugging switches:
333 */
334
335#define VERBOSE 0
336#define VERY_VERBOSE 0
337
338#if VERBOSE
339# define HARDIRQ_VERBOSE 1
340# define SOFTIRQ_VERBOSE 1
341# define RECLAIM_VERBOSE 1
342#else
343# define HARDIRQ_VERBOSE 0
344# define SOFTIRQ_VERBOSE 0
345# define RECLAIM_VERBOSE 0
346#endif
347
348#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
349/*
350 * Quick filtering for interesting events:
351 */
352static int class_filter(struct lock_class *class)
353{
354#if 0
355 /* Example */
356 if (class->name_version == 1 &&
357 !strcmp(class->name, "lockname"))
358 return 1;
359 if (class->name_version == 1 &&
360 !strcmp(class->name, "&struct->lockfield"))
361 return 1;
362#endif
363 /* Filter everything else. 1 would be to allow everything else */
364 return 0;
365}
366#endif
367
368static int verbose(struct lock_class *class)
369{
370#if VERBOSE
371 return class_filter(class);
372#endif
373 return 0;
374}
375
376/*
377 * Stack-trace: tightly packed array of stack backtrace
378 * addresses. Protected by the graph_lock.
379 */
380unsigned long nr_stack_trace_entries;
381static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
382
383static void print_lockdep_off(const char *bug_msg)
384{
385 printk(KERN_DEBUG "%s\n", bug_msg);
386 printk(KERN_DEBUG "turning off the locking correctness validator.\n");
387 printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n");
388}
389
390static int save_trace(struct stack_trace *trace)
391{
392 trace->nr_entries = 0;
393 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
394 trace->entries = stack_trace + nr_stack_trace_entries;
395
396 trace->skip = 3;
397
398 save_stack_trace(trace);
399
400 /*
401 * Some daft arches put -1 at the end to indicate its a full trace.
402 *
403 * <rant> this is buggy anyway, since it takes a whole extra entry so a
404 * complete trace that maxes out the entries provided will be reported
405 * as incomplete, friggin useless </rant>
406 */
407 if (trace->nr_entries != 0 &&
408 trace->entries[trace->nr_entries-1] == ULONG_MAX)
409 trace->nr_entries--;
410
411 trace->max_entries = trace->nr_entries;
412
413 nr_stack_trace_entries += trace->nr_entries;
414
415 if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
416 if (!debug_locks_off_graph_unlock())
417 return 0;
418
419 print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
420 dump_stack();
421
422 return 0;
423 }
424
425 return 1;
426}
427
428unsigned int nr_hardirq_chains;
429unsigned int nr_softirq_chains;
430unsigned int nr_process_chains;
431unsigned int max_lockdep_depth;
432
433#ifdef CONFIG_DEBUG_LOCKDEP
434/*
435 * We cannot printk in early bootup code. Not even early_printk()
436 * might work. So we mark any initialization errors and printk
437 * about it later on, in lockdep_info().
438 */
439static int lockdep_init_error;
440static const char *lock_init_error;
441static unsigned long lockdep_init_trace_data[20];
442static struct stack_trace lockdep_init_trace = {
443 .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
444 .entries = lockdep_init_trace_data,
445};
446
447/*
448 * Various lockdep statistics:
449 */
450DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
451#endif
452
453/*
454 * Locking printouts:
455 */
456
457#define __USAGE(__STATE) \
458 [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \
459 [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \
460 [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\
461 [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R",
462
463static const char *usage_str[] =
464{
465#define LOCKDEP_STATE(__STATE) __USAGE(__STATE)
466#include "lockdep_states.h"
467#undef LOCKDEP_STATE
468 [LOCK_USED] = "INITIAL USE",
469};
470
471const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
472{
473 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
474}
475
476static inline unsigned long lock_flag(enum lock_usage_bit bit)
477{
478 return 1UL << bit;
479}
480
481static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
482{
483 char c = '.';
484
485 if (class->usage_mask & lock_flag(bit + 2))
486 c = '+';
487 if (class->usage_mask & lock_flag(bit)) {
488 c = '-';
489 if (class->usage_mask & lock_flag(bit + 2))
490 c = '?';
491 }
492
493 return c;
494}
495
496void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
497{
498 int i = 0;
499
500#define LOCKDEP_STATE(__STATE) \
501 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \
502 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ);
503#include "lockdep_states.h"
504#undef LOCKDEP_STATE
505
506 usage[i] = '\0';
507}
508
509static void __print_lock_name(struct lock_class *class)
510{
511 char str[KSYM_NAME_LEN];
512 const char *name;
513
514 name = class->name;
515 if (!name) {
516 name = __get_key_name(class->key, str);
517 printk("%s", name);
518 } else {
519 printk("%s", name);
520 if (class->name_version > 1)
521 printk("#%d", class->name_version);
522 if (class->subclass)
523 printk("/%d", class->subclass);
524 }
525}
526
527static void print_lock_name(struct lock_class *class)
528{
529 char usage[LOCK_USAGE_CHARS];
530
531 get_usage_chars(class, usage);
532
533 printk(" (");
534 __print_lock_name(class);
535 printk("){%s}", usage);
536}
537
538static void print_lockdep_cache(struct lockdep_map *lock)
539{
540 const char *name;
541 char str[KSYM_NAME_LEN];
542
543 name = lock->name;
544 if (!name)
545 name = __get_key_name(lock->key->subkeys, str);
546
547 printk("%s", name);
548}
549
550static void print_lock(struct held_lock *hlock)
551{
552 print_lock_name(hlock_class(hlock));
553 printk(", at: ");
554 print_ip_sym(hlock->acquire_ip);
555}
556
557static void lockdep_print_held_locks(struct task_struct *curr)
558{
559 int i, depth = curr->lockdep_depth;
560
561 if (!depth) {
562 printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
563 return;
564 }
565 printk("%d lock%s held by %s/%d:\n",
566 depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
567
568 for (i = 0; i < depth; i++) {
569 printk(" #%d: ", i);
570 print_lock(curr->held_locks + i);
571 }
572}
573
574static void print_kernel_ident(void)
575{
576 printk("%s %.*s %s\n", init_utsname()->release,
577 (int)strcspn(init_utsname()->version, " "),
578 init_utsname()->version,
579 print_tainted());
580}
581
582static int very_verbose(struct lock_class *class)
583{
584#if VERY_VERBOSE
585 return class_filter(class);
586#endif
587 return 0;
588}
589
590/*
591 * Is this the address of a static object:
592 */
593static int static_obj(void *obj)
594{
595 unsigned long start = (unsigned long) &_stext,
596 end = (unsigned long) &_end,
597 addr = (unsigned long) obj;
598
599 /*
600 * static variable?
601 */
602 if ((addr >= start) && (addr < end))
603 return 1;
604
605 if (arch_is_kernel_data(addr))
606 return 1;
607
608 /*
609 * in-kernel percpu var?
610 */
611 if (is_kernel_percpu_address(addr))
612 return 1;
613
614 /*
615 * module static or percpu var?
616 */
617 return is_module_address(addr) || is_module_percpu_address(addr);
618}
619
620/*
621 * To make lock name printouts unique, we calculate a unique
622 * class->name_version generation counter:
623 */
624static int count_matching_names(struct lock_class *new_class)
625{
626 struct lock_class *class;
627 int count = 0;
628
629 if (!new_class->name)
630 return 0;
631
632 list_for_each_entry(class, &all_lock_classes, lock_entry) {
633 if (new_class->key - new_class->subclass == class->key)
634 return class->name_version;
635 if (class->name && !strcmp(class->name, new_class->name))
636 count = max(count, class->name_version);
637 }
638
639 return count + 1;
640}
641
642/*
643 * Register a lock's class in the hash-table, if the class is not present
644 * yet. Otherwise we look it up. We cache the result in the lock object
645 * itself, so actual lookup of the hash should be once per lock object.
646 */
647static inline struct lock_class *
648look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
649{
650 struct lockdep_subclass_key *key;
651 struct list_head *hash_head;
652 struct lock_class *class;
653
654#ifdef CONFIG_DEBUG_LOCKDEP
655 /*
656 * If the architecture calls into lockdep before initializing
657 * the hashes then we'll warn about it later. (we cannot printk
658 * right now)
659 */
660 if (unlikely(!lockdep_initialized)) {
661 lockdep_init();
662 lockdep_init_error = 1;
663 lock_init_error = lock->name;
664 save_stack_trace(&lockdep_init_trace);
665 }
666#endif
667
668 if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
669 debug_locks_off();
670 printk(KERN_ERR
671 "BUG: looking up invalid subclass: %u\n", subclass);
672 printk(KERN_ERR
673 "turning off the locking correctness validator.\n");
674 dump_stack();
675 return NULL;
676 }
677
678 /*
679 * Static locks do not have their class-keys yet - for them the key
680 * is the lock object itself:
681 */
682 if (unlikely(!lock->key))
683 lock->key = (void *)lock;
684
685 /*
686 * NOTE: the class-key must be unique. For dynamic locks, a static
687 * lock_class_key variable is passed in through the mutex_init()
688 * (or spin_lock_init()) call - which acts as the key. For static
689 * locks we use the lock object itself as the key.
690 */
691 BUILD_BUG_ON(sizeof(struct lock_class_key) >
692 sizeof(struct lockdep_map));
693
694 key = lock->key->subkeys + subclass;
695
696 hash_head = classhashentry(key);
697
698 /*
699 * We can walk the hash lockfree, because the hash only
700 * grows, and we are careful when adding entries to the end:
701 */
702 list_for_each_entry(class, hash_head, hash_entry) {
703 if (class->key == key) {
704 /*
705 * Huh! same key, different name? Did someone trample
706 * on some memory? We're most confused.
707 */
708 WARN_ON_ONCE(class->name != lock->name);
709 return class;
710 }
711 }
712
713 return NULL;
714}
715
716/*
717 * Register a lock's class in the hash-table, if the class is not present
718 * yet. Otherwise we look it up. We cache the result in the lock object
719 * itself, so actual lookup of the hash should be once per lock object.
720 */
721static inline struct lock_class *
722register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
723{
724 struct lockdep_subclass_key *key;
725 struct list_head *hash_head;
726 struct lock_class *class;
727 unsigned long flags;
728
729 class = look_up_lock_class(lock, subclass);
730 if (likely(class))
731 goto out_set_class_cache;
732
733 /*
734 * Debug-check: all keys must be persistent!
735 */
736 if (!static_obj(lock->key)) {
737 debug_locks_off();
738 printk("INFO: trying to register non-static key.\n");
739 printk("the code is fine but needs lockdep annotation.\n");
740 printk("turning off the locking correctness validator.\n");
741 dump_stack();
742
743 return NULL;
744 }
745
746 key = lock->key->subkeys + subclass;
747 hash_head = classhashentry(key);
748
749 raw_local_irq_save(flags);
750 if (!graph_lock()) {
751 raw_local_irq_restore(flags);
752 return NULL;
753 }
754 /*
755 * We have to do the hash-walk again, to avoid races
756 * with another CPU:
757 */
758 list_for_each_entry(class, hash_head, hash_entry)
759 if (class->key == key)
760 goto out_unlock_set;
761 /*
762 * Allocate a new key from the static array, and add it to
763 * the hash:
764 */
765 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
766 if (!debug_locks_off_graph_unlock()) {
767 raw_local_irq_restore(flags);
768 return NULL;
769 }
770 raw_local_irq_restore(flags);
771
772 print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
773 dump_stack();
774 return NULL;
775 }
776 class = lock_classes + nr_lock_classes++;
777 debug_atomic_inc(nr_unused_locks);
778 class->key = key;
779 class->name = lock->name;
780 class->subclass = subclass;
781 INIT_LIST_HEAD(&class->lock_entry);
782 INIT_LIST_HEAD(&class->locks_before);
783 INIT_LIST_HEAD(&class->locks_after);
784 class->name_version = count_matching_names(class);
785 /*
786 * We use RCU's safe list-add method to make
787 * parallel walking of the hash-list safe:
788 */
789 list_add_tail_rcu(&class->hash_entry, hash_head);
790 /*
791 * Add it to the global list of classes:
792 */
793 list_add_tail_rcu(&class->lock_entry, &all_lock_classes);
794
795 if (verbose(class)) {
796 graph_unlock();
797 raw_local_irq_restore(flags);
798
799 printk("\nnew class %p: %s", class->key, class->name);
800 if (class->name_version > 1)
801 printk("#%d", class->name_version);
802 printk("\n");
803 dump_stack();
804
805 raw_local_irq_save(flags);
806 if (!graph_lock()) {
807 raw_local_irq_restore(flags);
808 return NULL;
809 }
810 }
811out_unlock_set:
812 graph_unlock();
813 raw_local_irq_restore(flags);
814
815out_set_class_cache:
816 if (!subclass || force)
817 lock->class_cache[0] = class;
818 else if (subclass < NR_LOCKDEP_CACHING_CLASSES)
819 lock->class_cache[subclass] = class;
820
821 /*
822 * Hash collision, did we smoke some? We found a class with a matching
823 * hash but the subclass -- which is hashed in -- didn't match.
824 */
825 if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
826 return NULL;
827
828 return class;
829}
830
831#ifdef CONFIG_PROVE_LOCKING
832/*
833 * Allocate a lockdep entry. (assumes the graph_lock held, returns
834 * with NULL on failure)
835 */
836static struct lock_list *alloc_list_entry(void)
837{
838 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
839 if (!debug_locks_off_graph_unlock())
840 return NULL;
841
842 print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
843 dump_stack();
844 return NULL;
845 }
846 return list_entries + nr_list_entries++;
847}
848
849/*
850 * Add a new dependency to the head of the list:
851 */
852static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
853 struct list_head *head, unsigned long ip,
854 int distance, struct stack_trace *trace)
855{
856 struct lock_list *entry;
857 /*
858 * Lock not present yet - get a new dependency struct and
859 * add it to the list:
860 */
861 entry = alloc_list_entry();
862 if (!entry)
863 return 0;
864
865 entry->class = this;
866 entry->distance = distance;
867 entry->trace = *trace;
868 /*
869 * Since we never remove from the dependency list, the list can
870 * be walked lockless by other CPUs, it's only allocation
871 * that must be protected by the spinlock. But this also means
872 * we must make new entries visible only once writes to the
873 * entry become visible - hence the RCU op:
874 */
875 list_add_tail_rcu(&entry->entry, head);
876
877 return 1;
878}
879
880/*
881 * For good efficiency of modular, we use power of 2
882 */
883#define MAX_CIRCULAR_QUEUE_SIZE 4096UL
884#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1)
885
886/*
887 * The circular_queue and helpers is used to implement the
888 * breadth-first search(BFS)algorithem, by which we can build
889 * the shortest path from the next lock to be acquired to the
890 * previous held lock if there is a circular between them.
891 */
892struct circular_queue {
893 unsigned long element[MAX_CIRCULAR_QUEUE_SIZE];
894 unsigned int front, rear;
895};
896
897static struct circular_queue lock_cq;
898
899unsigned int max_bfs_queue_depth;
900
901static unsigned int lockdep_dependency_gen_id;
902
903static inline void __cq_init(struct circular_queue *cq)
904{
905 cq->front = cq->rear = 0;
906 lockdep_dependency_gen_id++;
907}
908
909static inline int __cq_empty(struct circular_queue *cq)
910{
911 return (cq->front == cq->rear);
912}
913
914static inline int __cq_full(struct circular_queue *cq)
915{
916 return ((cq->rear + 1) & CQ_MASK) == cq->front;
917}
918
919static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
920{
921 if (__cq_full(cq))
922 return -1;
923
924 cq->element[cq->rear] = elem;
925 cq->rear = (cq->rear + 1) & CQ_MASK;
926 return 0;
927}
928
929static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
930{
931 if (__cq_empty(cq))
932 return -1;
933
934 *elem = cq->element[cq->front];
935 cq->front = (cq->front + 1) & CQ_MASK;
936 return 0;
937}
938
939static inline unsigned int __cq_get_elem_count(struct circular_queue *cq)
940{
941 return (cq->rear - cq->front) & CQ_MASK;
942}
943
944static inline void mark_lock_accessed(struct lock_list *lock,
945 struct lock_list *parent)
946{
947 unsigned long nr;
948
949 nr = lock - list_entries;
950 WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
951 lock->parent = parent;
952 lock->class->dep_gen_id = lockdep_dependency_gen_id;
953}
954
955static inline unsigned long lock_accessed(struct lock_list *lock)
956{
957 unsigned long nr;
958
959 nr = lock - list_entries;
960 WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
961 return lock->class->dep_gen_id == lockdep_dependency_gen_id;
962}
963
964static inline struct lock_list *get_lock_parent(struct lock_list *child)
965{
966 return child->parent;
967}
968
969static inline int get_lock_depth(struct lock_list *child)
970{
971 int depth = 0;
972 struct lock_list *parent;
973
974 while ((parent = get_lock_parent(child))) {
975 child = parent;
976 depth++;
977 }
978 return depth;
979}
980
981static int __bfs(struct lock_list *source_entry,
982 void *data,
983 int (*match)(struct lock_list *entry, void *data),
984 struct lock_list **target_entry,
985 int forward)
986{
987 struct lock_list *entry;
988 struct list_head *head;
989 struct circular_queue *cq = &lock_cq;
990 int ret = 1;
991
992 if (match(source_entry, data)) {
993 *target_entry = source_entry;
994 ret = 0;
995 goto exit;
996 }
997
998 if (forward)
999 head = &source_entry->class->locks_after;
1000 else
1001 head = &source_entry->class->locks_before;
1002
1003 if (list_empty(head))
1004 goto exit;
1005
1006 __cq_init(cq);
1007 __cq_enqueue(cq, (unsigned long)source_entry);
1008
1009 while (!__cq_empty(cq)) {
1010 struct lock_list *lock;
1011
1012 __cq_dequeue(cq, (unsigned long *)&lock);
1013
1014 if (!lock->class) {
1015 ret = -2;
1016 goto exit;
1017 }
1018
1019 if (forward)
1020 head = &lock->class->locks_after;
1021 else
1022 head = &lock->class->locks_before;
1023
1024 list_for_each_entry(entry, head, entry) {
1025 if (!lock_accessed(entry)) {
1026 unsigned int cq_depth;
1027 mark_lock_accessed(entry, lock);
1028 if (match(entry, data)) {
1029 *target_entry = entry;
1030 ret = 0;
1031 goto exit;
1032 }
1033
1034 if (__cq_enqueue(cq, (unsigned long)entry)) {
1035 ret = -1;
1036 goto exit;
1037 }
1038 cq_depth = __cq_get_elem_count(cq);
1039 if (max_bfs_queue_depth < cq_depth)
1040 max_bfs_queue_depth = cq_depth;
1041 }
1042 }
1043 }
1044exit:
1045 return ret;
1046}
1047
1048static inline int __bfs_forwards(struct lock_list *src_entry,
1049 void *data,
1050 int (*match)(struct lock_list *entry, void *data),
1051 struct lock_list **target_entry)
1052{
1053 return __bfs(src_entry, data, match, target_entry, 1);
1054
1055}
1056
1057static inline int __bfs_backwards(struct lock_list *src_entry,
1058 void *data,
1059 int (*match)(struct lock_list *entry, void *data),
1060 struct lock_list **target_entry)
1061{
1062 return __bfs(src_entry, data, match, target_entry, 0);
1063
1064}
1065
1066/*
1067 * Recursive, forwards-direction lock-dependency checking, used for
1068 * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
1069 * checking.
1070 */
1071
1072/*
1073 * Print a dependency chain entry (this is only done when a deadlock
1074 * has been detected):
1075 */
1076static noinline int
1077print_circular_bug_entry(struct lock_list *target, int depth)
1078{
1079 if (debug_locks_silent)
1080 return 0;
1081 printk("\n-> #%u", depth);
1082 print_lock_name(target->class);
1083 printk(":\n");
1084 print_stack_trace(&target->trace, 6);
1085
1086 return 0;
1087}
1088
1089static void
1090print_circular_lock_scenario(struct held_lock *src,
1091 struct held_lock *tgt,
1092 struct lock_list *prt)
1093{
1094 struct lock_class *source = hlock_class(src);
1095 struct lock_class *target = hlock_class(tgt);
1096 struct lock_class *parent = prt->class;
1097
1098 /*
1099 * A direct locking problem where unsafe_class lock is taken
1100 * directly by safe_class lock, then all we need to show
1101 * is the deadlock scenario, as it is obvious that the
1102 * unsafe lock is taken under the safe lock.
1103 *
1104 * But if there is a chain instead, where the safe lock takes
1105 * an intermediate lock (middle_class) where this lock is
1106 * not the same as the safe lock, then the lock chain is
1107 * used to describe the problem. Otherwise we would need
1108 * to show a different CPU case for each link in the chain
1109 * from the safe_class lock to the unsafe_class lock.
1110 */
1111 if (parent != source) {
1112 printk("Chain exists of:\n ");
1113 __print_lock_name(source);
1114 printk(" --> ");
1115 __print_lock_name(parent);
1116 printk(" --> ");
1117 __print_lock_name(target);
1118 printk("\n\n");
1119 }
1120
1121 printk(" Possible unsafe locking scenario:\n\n");
1122 printk(" CPU0 CPU1\n");
1123 printk(" ---- ----\n");
1124 printk(" lock(");
1125 __print_lock_name(target);
1126 printk(");\n");
1127 printk(" lock(");
1128 __print_lock_name(parent);
1129 printk(");\n");
1130 printk(" lock(");
1131 __print_lock_name(target);
1132 printk(");\n");
1133 printk(" lock(");
1134 __print_lock_name(source);
1135 printk(");\n");
1136 printk("\n *** DEADLOCK ***\n\n");
1137}
1138
1139/*
1140 * When a circular dependency is detected, print the
1141 * header first:
1142 */
1143static noinline int
1144print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1145 struct held_lock *check_src,
1146 struct held_lock *check_tgt)
1147{
1148 struct task_struct *curr = current;
1149
1150 if (debug_locks_silent)
1151 return 0;
1152
1153 printk("\n");
1154 printk("======================================================\n");
1155 printk("[ INFO: possible circular locking dependency detected ]\n");
1156 print_kernel_ident();
1157 printk("-------------------------------------------------------\n");
1158 printk("%s/%d is trying to acquire lock:\n",
1159 curr->comm, task_pid_nr(curr));
1160 print_lock(check_src);
1161 printk("\nbut task is already holding lock:\n");
1162 print_lock(check_tgt);
1163 printk("\nwhich lock already depends on the new lock.\n\n");
1164 printk("\nthe existing dependency chain (in reverse order) is:\n");
1165
1166 print_circular_bug_entry(entry, depth);
1167
1168 return 0;
1169}
1170
1171static inline int class_equal(struct lock_list *entry, void *data)
1172{
1173 return entry->class == data;
1174}
1175
1176static noinline int print_circular_bug(struct lock_list *this,
1177 struct lock_list *target,
1178 struct held_lock *check_src,
1179 struct held_lock *check_tgt)
1180{
1181 struct task_struct *curr = current;
1182 struct lock_list *parent;
1183 struct lock_list *first_parent;
1184 int depth;
1185
1186 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1187 return 0;
1188
1189 if (!save_trace(&this->trace))
1190 return 0;
1191
1192 depth = get_lock_depth(target);
1193
1194 print_circular_bug_header(target, depth, check_src, check_tgt);
1195
1196 parent = get_lock_parent(target);
1197 first_parent = parent;
1198
1199 while (parent) {
1200 print_circular_bug_entry(parent, --depth);
1201 parent = get_lock_parent(parent);
1202 }
1203
1204 printk("\nother info that might help us debug this:\n\n");
1205 print_circular_lock_scenario(check_src, check_tgt,
1206 first_parent);
1207
1208 lockdep_print_held_locks(curr);
1209
1210 printk("\nstack backtrace:\n");
1211 dump_stack();
1212
1213 return 0;
1214}
1215
1216static noinline int print_bfs_bug(int ret)
1217{
1218 if (!debug_locks_off_graph_unlock())
1219 return 0;
1220
1221 /*
1222 * Breadth-first-search failed, graph got corrupted?
1223 */
1224 WARN(1, "lockdep bfs error:%d\n", ret);
1225
1226 return 0;
1227}
1228
1229static int noop_count(struct lock_list *entry, void *data)
1230{
1231 (*(unsigned long *)data)++;
1232 return 0;
1233}
1234
1235static unsigned long __lockdep_count_forward_deps(struct lock_list *this)
1236{
1237 unsigned long count = 0;
1238 struct lock_list *uninitialized_var(target_entry);
1239
1240 __bfs_forwards(this, (void *)&count, noop_count, &target_entry);
1241
1242 return count;
1243}
1244unsigned long lockdep_count_forward_deps(struct lock_class *class)
1245{
1246 unsigned long ret, flags;
1247 struct lock_list this;
1248
1249 this.parent = NULL;
1250 this.class = class;
1251
1252 local_irq_save(flags);
1253 arch_spin_lock(&lockdep_lock);
1254 ret = __lockdep_count_forward_deps(&this);
1255 arch_spin_unlock(&lockdep_lock);
1256 local_irq_restore(flags);
1257
1258 return ret;
1259}
1260
1261static unsigned long __lockdep_count_backward_deps(struct lock_list *this)
1262{
1263 unsigned long count = 0;
1264 struct lock_list *uninitialized_var(target_entry);
1265
1266 __bfs_backwards(this, (void *)&count, noop_count, &target_entry);
1267
1268 return count;
1269}
1270
1271unsigned long lockdep_count_backward_deps(struct lock_class *class)
1272{
1273 unsigned long ret, flags;
1274 struct lock_list this;
1275
1276 this.parent = NULL;
1277 this.class = class;
1278
1279 local_irq_save(flags);
1280 arch_spin_lock(&lockdep_lock);
1281 ret = __lockdep_count_backward_deps(&this);
1282 arch_spin_unlock(&lockdep_lock);
1283 local_irq_restore(flags);
1284
1285 return ret;
1286}
1287
1288/*
1289 * Prove that the dependency graph starting at <entry> can not
1290 * lead to <target>. Print an error and return 0 if it does.
1291 */
1292static noinline int
1293check_noncircular(struct lock_list *root, struct lock_class *target,
1294 struct lock_list **target_entry)
1295{
1296 int result;
1297
1298 debug_atomic_inc(nr_cyclic_checks);
1299
1300 result = __bfs_forwards(root, target, class_equal, target_entry);
1301
1302 return result;
1303}
1304
1305#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1306/*
1307 * Forwards and backwards subgraph searching, for the purposes of
1308 * proving that two subgraphs can be connected by a new dependency
1309 * without creating any illegal irq-safe -> irq-unsafe lock dependency.
1310 */
1311
1312static inline int usage_match(struct lock_list *entry, void *bit)
1313{
1314 return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
1315}
1316
1317
1318
1319/*
1320 * Find a node in the forwards-direction dependency sub-graph starting
1321 * at @root->class that matches @bit.
1322 *
1323 * Return 0 if such a node exists in the subgraph, and put that node
1324 * into *@target_entry.
1325 *
1326 * Return 1 otherwise and keep *@target_entry unchanged.
1327 * Return <0 on error.
1328 */
1329static int
1330find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
1331 struct lock_list **target_entry)
1332{
1333 int result;
1334
1335 debug_atomic_inc(nr_find_usage_forwards_checks);
1336
1337 result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
1338
1339 return result;
1340}
1341
1342/*
1343 * Find a node in the backwards-direction dependency sub-graph starting
1344 * at @root->class that matches @bit.
1345 *
1346 * Return 0 if such a node exists in the subgraph, and put that node
1347 * into *@target_entry.
1348 *
1349 * Return 1 otherwise and keep *@target_entry unchanged.
1350 * Return <0 on error.
1351 */
1352static int
1353find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
1354 struct lock_list **target_entry)
1355{
1356 int result;
1357
1358 debug_atomic_inc(nr_find_usage_backwards_checks);
1359
1360 result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
1361
1362 return result;
1363}
1364
1365static void print_lock_class_header(struct lock_class *class, int depth)
1366{
1367 int bit;
1368
1369 printk("%*s->", depth, "");
1370 print_lock_name(class);
1371 printk(" ops: %lu", class->ops);
1372 printk(" {\n");
1373
1374 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
1375 if (class->usage_mask & (1 << bit)) {
1376 int len = depth;
1377
1378 len += printk("%*s %s", depth, "", usage_str[bit]);
1379 len += printk(" at:\n");
1380 print_stack_trace(class->usage_traces + bit, len);
1381 }
1382 }
1383 printk("%*s }\n", depth, "");
1384
1385 printk("%*s ... key at: ",depth,"");
1386 print_ip_sym((unsigned long)class->key);
1387}
1388
1389/*
1390 * printk the shortest lock dependencies from @start to @end in reverse order:
1391 */
1392static void __used
1393print_shortest_lock_dependencies(struct lock_list *leaf,
1394 struct lock_list *root)
1395{
1396 struct lock_list *entry = leaf;
1397 int depth;
1398
1399 /*compute depth from generated tree by BFS*/
1400 depth = get_lock_depth(leaf);
1401
1402 do {
1403 print_lock_class_header(entry->class, depth);
1404 printk("%*s ... acquired at:\n", depth, "");
1405 print_stack_trace(&entry->trace, 2);
1406 printk("\n");
1407
1408 if (depth == 0 && (entry != root)) {
1409 printk("lockdep:%s bad path found in chain graph\n", __func__);
1410 break;
1411 }
1412
1413 entry = get_lock_parent(entry);
1414 depth--;
1415 } while (entry && (depth >= 0));
1416
1417 return;
1418}
1419
1420static void
1421print_irq_lock_scenario(struct lock_list *safe_entry,
1422 struct lock_list *unsafe_entry,
1423 struct lock_class *prev_class,
1424 struct lock_class *next_class)
1425{
1426 struct lock_class *safe_class = safe_entry->class;
1427 struct lock_class *unsafe_class = unsafe_entry->class;
1428 struct lock_class *middle_class = prev_class;
1429
1430 if (middle_class == safe_class)
1431 middle_class = next_class;
1432
1433 /*
1434 * A direct locking problem where unsafe_class lock is taken
1435 * directly by safe_class lock, then all we need to show
1436 * is the deadlock scenario, as it is obvious that the
1437 * unsafe lock is taken under the safe lock.
1438 *
1439 * But if there is a chain instead, where the safe lock takes
1440 * an intermediate lock (middle_class) where this lock is
1441 * not the same as the safe lock, then the lock chain is
1442 * used to describe the problem. Otherwise we would need
1443 * to show a different CPU case for each link in the chain
1444 * from the safe_class lock to the unsafe_class lock.
1445 */
1446 if (middle_class != unsafe_class) {
1447 printk("Chain exists of:\n ");
1448 __print_lock_name(safe_class);
1449 printk(" --> ");
1450 __print_lock_name(middle_class);
1451 printk(" --> ");
1452 __print_lock_name(unsafe_class);
1453 printk("\n\n");
1454 }
1455
1456 printk(" Possible interrupt unsafe locking scenario:\n\n");
1457 printk(" CPU0 CPU1\n");
1458 printk(" ---- ----\n");
1459 printk(" lock(");
1460 __print_lock_name(unsafe_class);
1461 printk(");\n");
1462 printk(" local_irq_disable();\n");
1463 printk(" lock(");
1464 __print_lock_name(safe_class);
1465 printk(");\n");
1466 printk(" lock(");
1467 __print_lock_name(middle_class);
1468 printk(");\n");
1469 printk(" <Interrupt>\n");
1470 printk(" lock(");
1471 __print_lock_name(safe_class);
1472 printk(");\n");
1473 printk("\n *** DEADLOCK ***\n\n");
1474}
1475
1476static int
1477print_bad_irq_dependency(struct task_struct *curr,
1478 struct lock_list *prev_root,
1479 struct lock_list *next_root,
1480 struct lock_list *backwards_entry,
1481 struct lock_list *forwards_entry,
1482 struct held_lock *prev,
1483 struct held_lock *next,
1484 enum lock_usage_bit bit1,
1485 enum lock_usage_bit bit2,
1486 const char *irqclass)
1487{
1488 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1489 return 0;
1490
1491 printk("\n");
1492 printk("======================================================\n");
1493 printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
1494 irqclass, irqclass);
1495 print_kernel_ident();
1496 printk("------------------------------------------------------\n");
1497 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
1498 curr->comm, task_pid_nr(curr),
1499 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
1500 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
1501 curr->hardirqs_enabled,
1502 curr->softirqs_enabled);
1503 print_lock(next);
1504
1505 printk("\nand this task is already holding:\n");
1506 print_lock(prev);
1507 printk("which would create a new lock dependency:\n");
1508 print_lock_name(hlock_class(prev));
1509 printk(" ->");
1510 print_lock_name(hlock_class(next));
1511 printk("\n");
1512
1513 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
1514 irqclass);
1515 print_lock_name(backwards_entry->class);
1516 printk("\n... which became %s-irq-safe at:\n", irqclass);
1517
1518 print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
1519
1520 printk("\nto a %s-irq-unsafe lock:\n", irqclass);
1521 print_lock_name(forwards_entry->class);
1522 printk("\n... which became %s-irq-unsafe at:\n", irqclass);
1523 printk("...");
1524
1525 print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
1526
1527 printk("\nother info that might help us debug this:\n\n");
1528 print_irq_lock_scenario(backwards_entry, forwards_entry,
1529 hlock_class(prev), hlock_class(next));
1530
1531 lockdep_print_held_locks(curr);
1532
1533 printk("\nthe dependencies between %s-irq-safe lock", irqclass);
1534 printk(" and the holding lock:\n");
1535 if (!save_trace(&prev_root->trace))
1536 return 0;
1537 print_shortest_lock_dependencies(backwards_entry, prev_root);
1538
1539 printk("\nthe dependencies between the lock to be acquired");
1540 printk(" and %s-irq-unsafe lock:\n", irqclass);
1541 if (!save_trace(&next_root->trace))
1542 return 0;
1543 print_shortest_lock_dependencies(forwards_entry, next_root);
1544
1545 printk("\nstack backtrace:\n");
1546 dump_stack();
1547
1548 return 0;
1549}
1550
1551static int
1552check_usage(struct task_struct *curr, struct held_lock *prev,
1553 struct held_lock *next, enum lock_usage_bit bit_backwards,
1554 enum lock_usage_bit bit_forwards, const char *irqclass)
1555{
1556 int ret;
1557 struct lock_list this, that;
1558 struct lock_list *uninitialized_var(target_entry);
1559 struct lock_list *uninitialized_var(target_entry1);
1560
1561 this.parent = NULL;
1562
1563 this.class = hlock_class(prev);
1564 ret = find_usage_backwards(&this, bit_backwards, &target_entry);
1565 if (ret < 0)
1566 return print_bfs_bug(ret);
1567 if (ret == 1)
1568 return ret;
1569
1570 that.parent = NULL;
1571 that.class = hlock_class(next);
1572 ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
1573 if (ret < 0)
1574 return print_bfs_bug(ret);
1575 if (ret == 1)
1576 return ret;
1577
1578 return print_bad_irq_dependency(curr, &this, &that,
1579 target_entry, target_entry1,
1580 prev, next,
1581 bit_backwards, bit_forwards, irqclass);
1582}
1583
1584static const char *state_names[] = {
1585#define LOCKDEP_STATE(__STATE) \
1586 __stringify(__STATE),
1587#include "lockdep_states.h"
1588#undef LOCKDEP_STATE
1589};
1590
1591static const char *state_rnames[] = {
1592#define LOCKDEP_STATE(__STATE) \
1593 __stringify(__STATE)"-READ",
1594#include "lockdep_states.h"
1595#undef LOCKDEP_STATE
1596};
1597
1598static inline const char *state_name(enum lock_usage_bit bit)
1599{
1600 return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2];
1601}
1602
1603static int exclusive_bit(int new_bit)
1604{
1605 /*
1606 * USED_IN
1607 * USED_IN_READ
1608 * ENABLED
1609 * ENABLED_READ
1610 *
1611 * bit 0 - write/read
1612 * bit 1 - used_in/enabled
1613 * bit 2+ state
1614 */
1615
1616 int state = new_bit & ~3;
1617 int dir = new_bit & 2;
1618
1619 /*
1620 * keep state, bit flip the direction and strip read.
1621 */
1622 return state | (dir ^ 2);
1623}
1624
1625static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
1626 struct held_lock *next, enum lock_usage_bit bit)
1627{
1628 /*
1629 * Prove that the new dependency does not connect a hardirq-safe
1630 * lock with a hardirq-unsafe lock - to achieve this we search
1631 * the backwards-subgraph starting at <prev>, and the
1632 * forwards-subgraph starting at <next>:
1633 */
1634 if (!check_usage(curr, prev, next, bit,
1635 exclusive_bit(bit), state_name(bit)))
1636 return 0;
1637
1638 bit++; /* _READ */
1639
1640 /*
1641 * Prove that the new dependency does not connect a hardirq-safe-read
1642 * lock with a hardirq-unsafe lock - to achieve this we search
1643 * the backwards-subgraph starting at <prev>, and the
1644 * forwards-subgraph starting at <next>:
1645 */
1646 if (!check_usage(curr, prev, next, bit,
1647 exclusive_bit(bit), state_name(bit)))
1648 return 0;
1649
1650 return 1;
1651}
1652
1653static int
1654check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1655 struct held_lock *next)
1656{
1657#define LOCKDEP_STATE(__STATE) \
1658 if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
1659 return 0;
1660#include "lockdep_states.h"
1661#undef LOCKDEP_STATE
1662
1663 return 1;
1664}
1665
1666static void inc_chains(void)
1667{
1668 if (current->hardirq_context)
1669 nr_hardirq_chains++;
1670 else {
1671 if (current->softirq_context)
1672 nr_softirq_chains++;
1673 else
1674 nr_process_chains++;
1675 }
1676}
1677
1678#else
1679
1680static inline int
1681check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1682 struct held_lock *next)
1683{
1684 return 1;
1685}
1686
1687static inline void inc_chains(void)
1688{
1689 nr_process_chains++;
1690}
1691
1692#endif
1693
1694static void
1695print_deadlock_scenario(struct held_lock *nxt,
1696 struct held_lock *prv)
1697{
1698 struct lock_class *next = hlock_class(nxt);
1699 struct lock_class *prev = hlock_class(prv);
1700
1701 printk(" Possible unsafe locking scenario:\n\n");
1702 printk(" CPU0\n");
1703 printk(" ----\n");
1704 printk(" lock(");
1705 __print_lock_name(prev);
1706 printk(");\n");
1707 printk(" lock(");
1708 __print_lock_name(next);
1709 printk(");\n");
1710 printk("\n *** DEADLOCK ***\n\n");
1711 printk(" May be due to missing lock nesting notation\n\n");
1712}
1713
1714static int
1715print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
1716 struct held_lock *next)
1717{
1718 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1719 return 0;
1720
1721 printk("\n");
1722 printk("=============================================\n");
1723 printk("[ INFO: possible recursive locking detected ]\n");
1724 print_kernel_ident();
1725 printk("---------------------------------------------\n");
1726 printk("%s/%d is trying to acquire lock:\n",
1727 curr->comm, task_pid_nr(curr));
1728 print_lock(next);
1729 printk("\nbut task is already holding lock:\n");
1730 print_lock(prev);
1731
1732 printk("\nother info that might help us debug this:\n");
1733 print_deadlock_scenario(next, prev);
1734 lockdep_print_held_locks(curr);
1735
1736 printk("\nstack backtrace:\n");
1737 dump_stack();
1738
1739 return 0;
1740}
1741
1742/*
1743 * Check whether we are holding such a class already.
1744 *
1745 * (Note that this has to be done separately, because the graph cannot
1746 * detect such classes of deadlocks.)
1747 *
1748 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
1749 */
1750static int
1751check_deadlock(struct task_struct *curr, struct held_lock *next,
1752 struct lockdep_map *next_instance, int read)
1753{
1754 struct held_lock *prev;
1755 struct held_lock *nest = NULL;
1756 int i;
1757
1758 for (i = 0; i < curr->lockdep_depth; i++) {
1759 prev = curr->held_locks + i;
1760
1761 if (prev->instance == next->nest_lock)
1762 nest = prev;
1763
1764 if (hlock_class(prev) != hlock_class(next))
1765 continue;
1766
1767 /*
1768 * Allow read-after-read recursion of the same
1769 * lock class (i.e. read_lock(lock)+read_lock(lock)):
1770 */
1771 if ((read == 2) && prev->read)
1772 return 2;
1773
1774 /*
1775 * We're holding the nest_lock, which serializes this lock's
1776 * nesting behaviour.
1777 */
1778 if (nest)
1779 return 2;
1780
1781 return print_deadlock_bug(curr, prev, next);
1782 }
1783 return 1;
1784}
1785
1786/*
1787 * There was a chain-cache miss, and we are about to add a new dependency
1788 * to a previous lock. We recursively validate the following rules:
1789 *
1790 * - would the adding of the <prev> -> <next> dependency create a
1791 * circular dependency in the graph? [== circular deadlock]
1792 *
1793 * - does the new prev->next dependency connect any hardirq-safe lock
1794 * (in the full backwards-subgraph starting at <prev>) with any
1795 * hardirq-unsafe lock (in the full forwards-subgraph starting at
1796 * <next>)? [== illegal lock inversion with hardirq contexts]
1797 *
1798 * - does the new prev->next dependency connect any softirq-safe lock
1799 * (in the full backwards-subgraph starting at <prev>) with any
1800 * softirq-unsafe lock (in the full forwards-subgraph starting at
1801 * <next>)? [== illegal lock inversion with softirq contexts]
1802 *
1803 * any of these scenarios could lead to a deadlock.
1804 *
1805 * Then if all the validations pass, we add the forwards and backwards
1806 * dependency.
1807 */
1808static int
1809check_prev_add(struct task_struct *curr, struct held_lock *prev,
1810 struct held_lock *next, int distance, int trylock_loop)
1811{
1812 struct lock_list *entry;
1813 int ret;
1814 struct lock_list this;
1815 struct lock_list *uninitialized_var(target_entry);
1816 /*
1817 * Static variable, serialized by the graph_lock().
1818 *
1819 * We use this static variable to save the stack trace in case
1820 * we call into this function multiple times due to encountering
1821 * trylocks in the held lock stack.
1822 */
1823 static struct stack_trace trace;
1824
1825 /*
1826 * Prove that the new <prev> -> <next> dependency would not
1827 * create a circular dependency in the graph. (We do this by
1828 * forward-recursing into the graph starting at <next>, and
1829 * checking whether we can reach <prev>.)
1830 *
1831 * We are using global variables to control the recursion, to
1832 * keep the stackframe size of the recursive functions low:
1833 */
1834 this.class = hlock_class(next);
1835 this.parent = NULL;
1836 ret = check_noncircular(&this, hlock_class(prev), &target_entry);
1837 if (unlikely(!ret))
1838 return print_circular_bug(&this, target_entry, next, prev);
1839 else if (unlikely(ret < 0))
1840 return print_bfs_bug(ret);
1841
1842 if (!check_prev_add_irq(curr, prev, next))
1843 return 0;
1844
1845 /*
1846 * For recursive read-locks we do all the dependency checks,
1847 * but we dont store read-triggered dependencies (only
1848 * write-triggered dependencies). This ensures that only the
1849 * write-side dependencies matter, and that if for example a
1850 * write-lock never takes any other locks, then the reads are
1851 * equivalent to a NOP.
1852 */
1853 if (next->read == 2 || prev->read == 2)
1854 return 1;
1855 /*
1856 * Is the <prev> -> <next> dependency already present?
1857 *
1858 * (this may occur even though this is a new chain: consider
1859 * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
1860 * chains - the second one will be new, but L1 already has
1861 * L2 added to its dependency list, due to the first chain.)
1862 */
1863 list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) {
1864 if (entry->class == hlock_class(next)) {
1865 if (distance == 1)
1866 entry->distance = 1;
1867 return 2;
1868 }
1869 }
1870
1871 if (!trylock_loop && !save_trace(&trace))
1872 return 0;
1873
1874 /*
1875 * Ok, all validations passed, add the new lock
1876 * to the previous lock's dependency list:
1877 */
1878 ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
1879 &hlock_class(prev)->locks_after,
1880 next->acquire_ip, distance, &trace);
1881
1882 if (!ret)
1883 return 0;
1884
1885 ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
1886 &hlock_class(next)->locks_before,
1887 next->acquire_ip, distance, &trace);
1888 if (!ret)
1889 return 0;
1890
1891 /*
1892 * Debugging printouts:
1893 */
1894 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
1895 graph_unlock();
1896 printk("\n new dependency: ");
1897 print_lock_name(hlock_class(prev));
1898 printk(" => ");
1899 print_lock_name(hlock_class(next));
1900 printk("\n");
1901 dump_stack();
1902 return graph_lock();
1903 }
1904 return 1;
1905}
1906
1907/*
1908 * Add the dependency to all directly-previous locks that are 'relevant'.
1909 * The ones that are relevant are (in increasing distance from curr):
1910 * all consecutive trylock entries and the final non-trylock entry - or
1911 * the end of this context's lock-chain - whichever comes first.
1912 */
1913static int
1914check_prevs_add(struct task_struct *curr, struct held_lock *next)
1915{
1916 int depth = curr->lockdep_depth;
1917 int trylock_loop = 0;
1918 struct held_lock *hlock;
1919
1920 /*
1921 * Debugging checks.
1922 *
1923 * Depth must not be zero for a non-head lock:
1924 */
1925 if (!depth)
1926 goto out_bug;
1927 /*
1928 * At least two relevant locks must exist for this
1929 * to be a head:
1930 */
1931 if (curr->held_locks[depth].irq_context !=
1932 curr->held_locks[depth-1].irq_context)
1933 goto out_bug;
1934
1935 for (;;) {
1936 int distance = curr->lockdep_depth - depth + 1;
1937 hlock = curr->held_locks + depth-1;
1938 /*
1939 * Only non-recursive-read entries get new dependencies
1940 * added:
1941 */
1942 if (hlock->read != 2) {
1943 if (!check_prev_add(curr, hlock, next,
1944 distance, trylock_loop))
1945 return 0;
1946 /*
1947 * Stop after the first non-trylock entry,
1948 * as non-trylock entries have added their
1949 * own direct dependencies already, so this
1950 * lock is connected to them indirectly:
1951 */
1952 if (!hlock->trylock)
1953 break;
1954 }
1955 depth--;
1956 /*
1957 * End of lock-stack?
1958 */
1959 if (!depth)
1960 break;
1961 /*
1962 * Stop the search if we cross into another context:
1963 */
1964 if (curr->held_locks[depth].irq_context !=
1965 curr->held_locks[depth-1].irq_context)
1966 break;
1967 trylock_loop = 1;
1968 }
1969 return 1;
1970out_bug:
1971 if (!debug_locks_off_graph_unlock())
1972 return 0;
1973
1974 /*
1975 * Clearly we all shouldn't be here, but since we made it we
1976 * can reliable say we messed up our state. See the above two
1977 * gotos for reasons why we could possibly end up here.
1978 */
1979 WARN_ON(1);
1980
1981 return 0;
1982}
1983
1984unsigned long nr_lock_chains;
1985struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
1986int nr_chain_hlocks;
1987static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS];
1988
1989struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
1990{
1991 return lock_classes + chain_hlocks[chain->base + i];
1992}
1993
1994/*
1995 * Look up a dependency chain. If the key is not present yet then
1996 * add it and return 1 - in this case the new dependency chain is
1997 * validated. If the key is already hashed, return 0.
1998 * (On return with 1 graph_lock is held.)
1999 */
2000static inline int lookup_chain_cache(struct task_struct *curr,
2001 struct held_lock *hlock,
2002 u64 chain_key)
2003{
2004 struct lock_class *class = hlock_class(hlock);
2005 struct list_head *hash_head = chainhashentry(chain_key);
2006 struct lock_chain *chain;
2007 struct held_lock *hlock_curr;
2008 int i, j;
2009
2010 /*
2011 * We might need to take the graph lock, ensure we've got IRQs
2012 * disabled to make this an IRQ-safe lock.. for recursion reasons
2013 * lockdep won't complain about its own locking errors.
2014 */
2015 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2016 return 0;
2017 /*
2018 * We can walk it lock-free, because entries only get added
2019 * to the hash:
2020 */
2021 list_for_each_entry(chain, hash_head, entry) {
2022 if (chain->chain_key == chain_key) {
2023cache_hit:
2024 debug_atomic_inc(chain_lookup_hits);
2025 if (very_verbose(class))
2026 printk("\nhash chain already cached, key: "
2027 "%016Lx tail class: [%p] %s\n",
2028 (unsigned long long)chain_key,
2029 class->key, class->name);
2030 return 0;
2031 }
2032 }
2033 if (very_verbose(class))
2034 printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
2035 (unsigned long long)chain_key, class->key, class->name);
2036 /*
2037 * Allocate a new chain entry from the static array, and add
2038 * it to the hash:
2039 */
2040 if (!graph_lock())
2041 return 0;
2042 /*
2043 * We have to walk the chain again locked - to avoid duplicates:
2044 */
2045 list_for_each_entry(chain, hash_head, entry) {
2046 if (chain->chain_key == chain_key) {
2047 graph_unlock();
2048 goto cache_hit;
2049 }
2050 }
2051 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
2052 if (!debug_locks_off_graph_unlock())
2053 return 0;
2054
2055 print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
2056 dump_stack();
2057 return 0;
2058 }
2059 chain = lock_chains + nr_lock_chains++;
2060 chain->chain_key = chain_key;
2061 chain->irq_context = hlock->irq_context;
2062 /* Find the first held_lock of current chain */
2063 for (i = curr->lockdep_depth - 1; i >= 0; i--) {
2064 hlock_curr = curr->held_locks + i;
2065 if (hlock_curr->irq_context != hlock->irq_context)
2066 break;
2067 }
2068 i++;
2069 chain->depth = curr->lockdep_depth + 1 - i;
2070 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
2071 chain->base = nr_chain_hlocks;
2072 nr_chain_hlocks += chain->depth;
2073 for (j = 0; j < chain->depth - 1; j++, i++) {
2074 int lock_id = curr->held_locks[i].class_idx - 1;
2075 chain_hlocks[chain->base + j] = lock_id;
2076 }
2077 chain_hlocks[chain->base + j] = class - lock_classes;
2078 }
2079 list_add_tail_rcu(&chain->entry, hash_head);
2080 debug_atomic_inc(chain_lookup_misses);
2081 inc_chains();
2082
2083 return 1;
2084}
2085
2086static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
2087 struct held_lock *hlock, int chain_head, u64 chain_key)
2088{
2089 /*
2090 * Trylock needs to maintain the stack of held locks, but it
2091 * does not add new dependencies, because trylock can be done
2092 * in any order.
2093 *
2094 * We look up the chain_key and do the O(N^2) check and update of
2095 * the dependencies only if this is a new dependency chain.
2096 * (If lookup_chain_cache() returns with 1 it acquires
2097 * graph_lock for us)
2098 */
2099 if (!hlock->trylock && (hlock->check == 2) &&
2100 lookup_chain_cache(curr, hlock, chain_key)) {
2101 /*
2102 * Check whether last held lock:
2103 *
2104 * - is irq-safe, if this lock is irq-unsafe
2105 * - is softirq-safe, if this lock is hardirq-unsafe
2106 *
2107 * And check whether the new lock's dependency graph
2108 * could lead back to the previous lock.
2109 *
2110 * any of these scenarios could lead to a deadlock. If
2111 * All validations
2112 */
2113 int ret = check_deadlock(curr, hlock, lock, hlock->read);
2114
2115 if (!ret)
2116 return 0;
2117 /*
2118 * Mark recursive read, as we jump over it when
2119 * building dependencies (just like we jump over
2120 * trylock entries):
2121 */
2122 if (ret == 2)
2123 hlock->read = 2;
2124 /*
2125 * Add dependency only if this lock is not the head
2126 * of the chain, and if it's not a secondary read-lock:
2127 */
2128 if (!chain_head && ret != 2)
2129 if (!check_prevs_add(curr, hlock))
2130 return 0;
2131 graph_unlock();
2132 } else
2133 /* after lookup_chain_cache(): */
2134 if (unlikely(!debug_locks))
2135 return 0;
2136
2137 return 1;
2138}
2139#else
2140static inline int validate_chain(struct task_struct *curr,
2141 struct lockdep_map *lock, struct held_lock *hlock,
2142 int chain_head, u64 chain_key)
2143{
2144 return 1;
2145}
2146#endif
2147
2148/*
2149 * We are building curr_chain_key incrementally, so double-check
2150 * it from scratch, to make sure that it's done correctly:
2151 */
2152static void check_chain_key(struct task_struct *curr)
2153{
2154#ifdef CONFIG_DEBUG_LOCKDEP
2155 struct held_lock *hlock, *prev_hlock = NULL;
2156 unsigned int i, id;
2157 u64 chain_key = 0;
2158
2159 for (i = 0; i < curr->lockdep_depth; i++) {
2160 hlock = curr->held_locks + i;
2161 if (chain_key != hlock->prev_chain_key) {
2162 debug_locks_off();
2163 /*
2164 * We got mighty confused, our chain keys don't match
2165 * with what we expect, someone trample on our task state?
2166 */
2167 WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n",
2168 curr->lockdep_depth, i,
2169 (unsigned long long)chain_key,
2170 (unsigned long long)hlock->prev_chain_key);
2171 return;
2172 }
2173 id = hlock->class_idx - 1;
2174 /*
2175 * Whoops ran out of static storage again?
2176 */
2177 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
2178 return;
2179
2180 if (prev_hlock && (prev_hlock->irq_context !=
2181 hlock->irq_context))
2182 chain_key = 0;
2183 chain_key = iterate_chain_key(chain_key, id);
2184 prev_hlock = hlock;
2185 }
2186 if (chain_key != curr->curr_chain_key) {
2187 debug_locks_off();
2188 /*
2189 * More smoking hash instead of calculating it, damn see these
2190 * numbers float.. I bet that a pink elephant stepped on my memory.
2191 */
2192 WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n",
2193 curr->lockdep_depth, i,
2194 (unsigned long long)chain_key,
2195 (unsigned long long)curr->curr_chain_key);
2196 }
2197#endif
2198}
2199
2200static void
2201print_usage_bug_scenario(struct held_lock *lock)
2202{
2203 struct lock_class *class = hlock_class(lock);
2204
2205 printk(" Possible unsafe locking scenario:\n\n");
2206 printk(" CPU0\n");
2207 printk(" ----\n");
2208 printk(" lock(");
2209 __print_lock_name(class);
2210 printk(");\n");
2211 printk(" <Interrupt>\n");
2212 printk(" lock(");
2213 __print_lock_name(class);
2214 printk(");\n");
2215 printk("\n *** DEADLOCK ***\n\n");
2216}
2217
2218static int
2219print_usage_bug(struct task_struct *curr, struct held_lock *this,
2220 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
2221{
2222 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2223 return 0;
2224
2225 printk("\n");
2226 printk("=================================\n");
2227 printk("[ INFO: inconsistent lock state ]\n");
2228 print_kernel_ident();
2229 printk("---------------------------------\n");
2230
2231 printk("inconsistent {%s} -> {%s} usage.\n",
2232 usage_str[prev_bit], usage_str[new_bit]);
2233
2234 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
2235 curr->comm, task_pid_nr(curr),
2236 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
2237 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
2238 trace_hardirqs_enabled(curr),
2239 trace_softirqs_enabled(curr));
2240 print_lock(this);
2241
2242 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
2243 print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
2244
2245 print_irqtrace_events(curr);
2246 printk("\nother info that might help us debug this:\n");
2247 print_usage_bug_scenario(this);
2248
2249 lockdep_print_held_locks(curr);
2250
2251 printk("\nstack backtrace:\n");
2252 dump_stack();
2253
2254 return 0;
2255}
2256
2257/*
2258 * Print out an error if an invalid bit is set:
2259 */
2260static inline int
2261valid_state(struct task_struct *curr, struct held_lock *this,
2262 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
2263{
2264 if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit)))
2265 return print_usage_bug(curr, this, bad_bit, new_bit);
2266 return 1;
2267}
2268
2269static int mark_lock(struct task_struct *curr, struct held_lock *this,
2270 enum lock_usage_bit new_bit);
2271
2272#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
2273
2274/*
2275 * print irq inversion bug:
2276 */
2277static int
2278print_irq_inversion_bug(struct task_struct *curr,
2279 struct lock_list *root, struct lock_list *other,
2280 struct held_lock *this, int forwards,
2281 const char *irqclass)
2282{
2283 struct lock_list *entry = other;
2284 struct lock_list *middle = NULL;
2285 int depth;
2286
2287 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2288 return 0;
2289
2290 printk("\n");
2291 printk("=========================================================\n");
2292 printk("[ INFO: possible irq lock inversion dependency detected ]\n");
2293 print_kernel_ident();
2294 printk("---------------------------------------------------------\n");
2295 printk("%s/%d just changed the state of lock:\n",
2296 curr->comm, task_pid_nr(curr));
2297 print_lock(this);
2298 if (forwards)
2299 printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
2300 else
2301 printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
2302 print_lock_name(other->class);
2303 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
2304
2305 printk("\nother info that might help us debug this:\n");
2306
2307 /* Find a middle lock (if one exists) */
2308 depth = get_lock_depth(other);
2309 do {
2310 if (depth == 0 && (entry != root)) {
2311 printk("lockdep:%s bad path found in chain graph\n", __func__);
2312 break;
2313 }
2314 middle = entry;
2315 entry = get_lock_parent(entry);
2316 depth--;
2317 } while (entry && entry != root && (depth >= 0));
2318 if (forwards)
2319 print_irq_lock_scenario(root, other,
2320 middle ? middle->class : root->class, other->class);
2321 else
2322 print_irq_lock_scenario(other, root,
2323 middle ? middle->class : other->class, root->class);
2324
2325 lockdep_print_held_locks(curr);
2326
2327 printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
2328 if (!save_trace(&root->trace))
2329 return 0;
2330 print_shortest_lock_dependencies(other, root);
2331
2332 printk("\nstack backtrace:\n");
2333 dump_stack();
2334
2335 return 0;
2336}
2337
2338/*
2339 * Prove that in the forwards-direction subgraph starting at <this>
2340 * there is no lock matching <mask>:
2341 */
2342static int
2343check_usage_forwards(struct task_struct *curr, struct held_lock *this,
2344 enum lock_usage_bit bit, const char *irqclass)
2345{
2346 int ret;
2347 struct lock_list root;
2348 struct lock_list *uninitialized_var(target_entry);
2349
2350 root.parent = NULL;
2351 root.class = hlock_class(this);
2352 ret = find_usage_forwards(&root, bit, &target_entry);
2353 if (ret < 0)
2354 return print_bfs_bug(ret);
2355 if (ret == 1)
2356 return ret;
2357
2358 return print_irq_inversion_bug(curr, &root, target_entry,
2359 this, 1, irqclass);
2360}
2361
2362/*
2363 * Prove that in the backwards-direction subgraph starting at <this>
2364 * there is no lock matching <mask>:
2365 */
2366static int
2367check_usage_backwards(struct task_struct *curr, struct held_lock *this,
2368 enum lock_usage_bit bit, const char *irqclass)
2369{
2370 int ret;
2371 struct lock_list root;
2372 struct lock_list *uninitialized_var(target_entry);
2373
2374 root.parent = NULL;
2375 root.class = hlock_class(this);
2376 ret = find_usage_backwards(&root, bit, &target_entry);
2377 if (ret < 0)
2378 return print_bfs_bug(ret);
2379 if (ret == 1)
2380 return ret;
2381
2382 return print_irq_inversion_bug(curr, &root, target_entry,
2383 this, 0, irqclass);
2384}
2385
2386void print_irqtrace_events(struct task_struct *curr)
2387{
2388 printk("irq event stamp: %u\n", curr->irq_events);
2389 printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event);
2390 print_ip_sym(curr->hardirq_enable_ip);
2391 printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
2392 print_ip_sym(curr->hardirq_disable_ip);
2393 printk("softirqs last enabled at (%u): ", curr->softirq_enable_event);
2394 print_ip_sym(curr->softirq_enable_ip);
2395 printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
2396 print_ip_sym(curr->softirq_disable_ip);
2397}
2398
2399static int HARDIRQ_verbose(struct lock_class *class)
2400{
2401#if HARDIRQ_VERBOSE
2402 return class_filter(class);
2403#endif
2404 return 0;
2405}
2406
2407static int SOFTIRQ_verbose(struct lock_class *class)
2408{
2409#if SOFTIRQ_VERBOSE
2410 return class_filter(class);
2411#endif
2412 return 0;
2413}
2414
2415static int RECLAIM_FS_verbose(struct lock_class *class)
2416{
2417#if RECLAIM_VERBOSE
2418 return class_filter(class);
2419#endif
2420 return 0;
2421}
2422
2423#define STRICT_READ_CHECKS 1
2424
2425static int (*state_verbose_f[])(struct lock_class *class) = {
2426#define LOCKDEP_STATE(__STATE) \
2427 __STATE##_verbose,
2428#include "lockdep_states.h"
2429#undef LOCKDEP_STATE
2430};
2431
2432static inline int state_verbose(enum lock_usage_bit bit,
2433 struct lock_class *class)
2434{
2435 return state_verbose_f[bit >> 2](class);
2436}
2437
2438typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
2439 enum lock_usage_bit bit, const char *name);
2440
2441static int
2442mark_lock_irq(struct task_struct *curr, struct held_lock *this,
2443 enum lock_usage_bit new_bit)
2444{
2445 int excl_bit = exclusive_bit(new_bit);
2446 int read = new_bit & 1;
2447 int dir = new_bit & 2;
2448
2449 /*
2450 * mark USED_IN has to look forwards -- to ensure no dependency
2451 * has ENABLED state, which would allow recursion deadlocks.
2452 *
2453 * mark ENABLED has to look backwards -- to ensure no dependee
2454 * has USED_IN state, which, again, would allow recursion deadlocks.
2455 */
2456 check_usage_f usage = dir ?
2457 check_usage_backwards : check_usage_forwards;
2458
2459 /*
2460 * Validate that this particular lock does not have conflicting
2461 * usage states.
2462 */
2463 if (!valid_state(curr, this, new_bit, excl_bit))
2464 return 0;
2465
2466 /*
2467 * Validate that the lock dependencies don't have conflicting usage
2468 * states.
2469 */
2470 if ((!read || !dir || STRICT_READ_CHECKS) &&
2471 !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
2472 return 0;
2473
2474 /*
2475 * Check for read in write conflicts
2476 */
2477 if (!read) {
2478 if (!valid_state(curr, this, new_bit, excl_bit + 1))
2479 return 0;
2480
2481 if (STRICT_READ_CHECKS &&
2482 !usage(curr, this, excl_bit + 1,
2483 state_name(new_bit + 1)))
2484 return 0;
2485 }
2486
2487 if (state_verbose(new_bit, hlock_class(this)))
2488 return 2;
2489
2490 return 1;
2491}
2492
2493enum mark_type {
2494#define LOCKDEP_STATE(__STATE) __STATE,
2495#include "lockdep_states.h"
2496#undef LOCKDEP_STATE
2497};
2498
2499/*
2500 * Mark all held locks with a usage bit:
2501 */
2502static int
2503mark_held_locks(struct task_struct *curr, enum mark_type mark)
2504{
2505 enum lock_usage_bit usage_bit;
2506 struct held_lock *hlock;
2507 int i;
2508
2509 for (i = 0; i < curr->lockdep_depth; i++) {
2510 hlock = curr->held_locks + i;
2511
2512 usage_bit = 2 + (mark << 2); /* ENABLED */
2513 if (hlock->read)
2514 usage_bit += 1; /* READ */
2515
2516 BUG_ON(usage_bit >= LOCK_USAGE_STATES);
2517
2518 if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
2519 continue;
2520
2521 if (!mark_lock(curr, hlock, usage_bit))
2522 return 0;
2523 }
2524
2525 return 1;
2526}
2527
2528/*
2529 * Hardirqs will be enabled:
2530 */
2531static void __trace_hardirqs_on_caller(unsigned long ip)
2532{
2533 struct task_struct *curr = current;
2534
2535 /* we'll do an OFF -> ON transition: */
2536 curr->hardirqs_enabled = 1;
2537
2538 /*
2539 * We are going to turn hardirqs on, so set the
2540 * usage bit for all held locks:
2541 */
2542 if (!mark_held_locks(curr, HARDIRQ))
2543 return;
2544 /*
2545 * If we have softirqs enabled, then set the usage
2546 * bit for all held locks. (disabled hardirqs prevented
2547 * this bit from being set before)
2548 */
2549 if (curr->softirqs_enabled)
2550 if (!mark_held_locks(curr, SOFTIRQ))
2551 return;
2552
2553 curr->hardirq_enable_ip = ip;
2554 curr->hardirq_enable_event = ++curr->irq_events;
2555 debug_atomic_inc(hardirqs_on_events);
2556}
2557
2558void trace_hardirqs_on_caller(unsigned long ip)
2559{
2560 time_hardirqs_on(CALLER_ADDR0, ip);
2561
2562 if (unlikely(!debug_locks || current->lockdep_recursion))
2563 return;
2564
2565 if (unlikely(current->hardirqs_enabled)) {
2566 /*
2567 * Neither irq nor preemption are disabled here
2568 * so this is racy by nature but losing one hit
2569 * in a stat is not a big deal.
2570 */
2571 __debug_atomic_inc(redundant_hardirqs_on);
2572 return;
2573 }
2574
2575 /*
2576 * We're enabling irqs and according to our state above irqs weren't
2577 * already enabled, yet we find the hardware thinks they are in fact
2578 * enabled.. someone messed up their IRQ state tracing.
2579 */
2580 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2581 return;
2582
2583 /*
2584 * See the fine text that goes along with this variable definition.
2585 */
2586 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
2587 return;
2588
2589 /*
2590 * Can't allow enabling interrupts while in an interrupt handler,
2591 * that's general bad form and such. Recursion, limited stack etc..
2592 */
2593 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
2594 return;
2595
2596 current->lockdep_recursion = 1;
2597 __trace_hardirqs_on_caller(ip);
2598 current->lockdep_recursion = 0;
2599}
2600EXPORT_SYMBOL(trace_hardirqs_on_caller);
2601
2602void trace_hardirqs_on(void)
2603{
2604 trace_hardirqs_on_caller(CALLER_ADDR0);
2605}
2606EXPORT_SYMBOL(trace_hardirqs_on);
2607
2608/*
2609 * Hardirqs were disabled:
2610 */
2611void trace_hardirqs_off_caller(unsigned long ip)
2612{
2613 struct task_struct *curr = current;
2614
2615 time_hardirqs_off(CALLER_ADDR0, ip);
2616
2617 if (unlikely(!debug_locks || current->lockdep_recursion))
2618 return;
2619
2620 /*
2621 * So we're supposed to get called after you mask local IRQs, but for
2622 * some reason the hardware doesn't quite think you did a proper job.
2623 */
2624 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2625 return;
2626
2627 if (curr->hardirqs_enabled) {
2628 /*
2629 * We have done an ON -> OFF transition:
2630 */
2631 curr->hardirqs_enabled = 0;
2632 curr->hardirq_disable_ip = ip;
2633 curr->hardirq_disable_event = ++curr->irq_events;
2634 debug_atomic_inc(hardirqs_off_events);
2635 } else
2636 debug_atomic_inc(redundant_hardirqs_off);
2637}
2638EXPORT_SYMBOL(trace_hardirqs_off_caller);
2639
2640void trace_hardirqs_off(void)
2641{
2642 trace_hardirqs_off_caller(CALLER_ADDR0);
2643}
2644EXPORT_SYMBOL(trace_hardirqs_off);
2645
2646/*
2647 * Softirqs will be enabled:
2648 */
2649void trace_softirqs_on(unsigned long ip)
2650{
2651 struct task_struct *curr = current;
2652
2653 if (unlikely(!debug_locks || current->lockdep_recursion))
2654 return;
2655
2656 /*
2657 * We fancy IRQs being disabled here, see softirq.c, avoids
2658 * funny state and nesting things.
2659 */
2660 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2661 return;
2662
2663 if (curr->softirqs_enabled) {
2664 debug_atomic_inc(redundant_softirqs_on);
2665 return;
2666 }
2667
2668 current->lockdep_recursion = 1;
2669 /*
2670 * We'll do an OFF -> ON transition:
2671 */
2672 curr->softirqs_enabled = 1;
2673 curr->softirq_enable_ip = ip;
2674 curr->softirq_enable_event = ++curr->irq_events;
2675 debug_atomic_inc(softirqs_on_events);
2676 /*
2677 * We are going to turn softirqs on, so set the
2678 * usage bit for all held locks, if hardirqs are
2679 * enabled too:
2680 */
2681 if (curr->hardirqs_enabled)
2682 mark_held_locks(curr, SOFTIRQ);
2683 current->lockdep_recursion = 0;
2684}
2685
2686/*
2687 * Softirqs were disabled:
2688 */
2689void trace_softirqs_off(unsigned long ip)
2690{
2691 struct task_struct *curr = current;
2692
2693 if (unlikely(!debug_locks || current->lockdep_recursion))
2694 return;
2695
2696 /*
2697 * We fancy IRQs being disabled here, see softirq.c
2698 */
2699 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2700 return;
2701
2702 if (curr->softirqs_enabled) {
2703 /*
2704 * We have done an ON -> OFF transition:
2705 */
2706 curr->softirqs_enabled = 0;
2707 curr->softirq_disable_ip = ip;
2708 curr->softirq_disable_event = ++curr->irq_events;
2709 debug_atomic_inc(softirqs_off_events);
2710 /*
2711 * Whoops, we wanted softirqs off, so why aren't they?
2712 */
2713 DEBUG_LOCKS_WARN_ON(!softirq_count());
2714 } else
2715 debug_atomic_inc(redundant_softirqs_off);
2716}
2717
2718static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
2719{
2720 struct task_struct *curr = current;
2721
2722 if (unlikely(!debug_locks))
2723 return;
2724
2725 /* no reclaim without waiting on it */
2726 if (!(gfp_mask & __GFP_WAIT))
2727 return;
2728
2729 /* this guy won't enter reclaim */
2730 if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
2731 return;
2732
2733 /* We're only interested __GFP_FS allocations for now */
2734 if (!(gfp_mask & __GFP_FS))
2735 return;
2736
2737 /*
2738 * Oi! Can't be having __GFP_FS allocations with IRQs disabled.
2739 */
2740 if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
2741 return;
2742
2743 mark_held_locks(curr, RECLAIM_FS);
2744}
2745
2746static void check_flags(unsigned long flags);
2747
2748void lockdep_trace_alloc(gfp_t gfp_mask)
2749{
2750 unsigned long flags;
2751
2752 if (unlikely(current->lockdep_recursion))
2753 return;
2754
2755 raw_local_irq_save(flags);
2756 check_flags(flags);
2757 current->lockdep_recursion = 1;
2758 __lockdep_trace_alloc(gfp_mask, flags);
2759 current->lockdep_recursion = 0;
2760 raw_local_irq_restore(flags);
2761}
2762
2763static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2764{
2765 /*
2766 * If non-trylock use in a hardirq or softirq context, then
2767 * mark the lock as used in these contexts:
2768 */
2769 if (!hlock->trylock) {
2770 if (hlock->read) {
2771 if (curr->hardirq_context)
2772 if (!mark_lock(curr, hlock,
2773 LOCK_USED_IN_HARDIRQ_READ))
2774 return 0;
2775 if (curr->softirq_context)
2776 if (!mark_lock(curr, hlock,
2777 LOCK_USED_IN_SOFTIRQ_READ))
2778 return 0;
2779 } else {
2780 if (curr->hardirq_context)
2781 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
2782 return 0;
2783 if (curr->softirq_context)
2784 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ))
2785 return 0;
2786 }
2787 }
2788 if (!hlock->hardirqs_off) {
2789 if (hlock->read) {
2790 if (!mark_lock(curr, hlock,
2791 LOCK_ENABLED_HARDIRQ_READ))
2792 return 0;
2793 if (curr->softirqs_enabled)
2794 if (!mark_lock(curr, hlock,
2795 LOCK_ENABLED_SOFTIRQ_READ))
2796 return 0;
2797 } else {
2798 if (!mark_lock(curr, hlock,
2799 LOCK_ENABLED_HARDIRQ))
2800 return 0;
2801 if (curr->softirqs_enabled)
2802 if (!mark_lock(curr, hlock,
2803 LOCK_ENABLED_SOFTIRQ))
2804 return 0;
2805 }
2806 }
2807
2808 /*
2809 * We reuse the irq context infrastructure more broadly as a general
2810 * context checking code. This tests GFP_FS recursion (a lock taken
2811 * during reclaim for a GFP_FS allocation is held over a GFP_FS
2812 * allocation).
2813 */
2814 if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
2815 if (hlock->read) {
2816 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
2817 return 0;
2818 } else {
2819 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
2820 return 0;
2821 }
2822 }
2823
2824 return 1;
2825}
2826
2827static int separate_irq_context(struct task_struct *curr,
2828 struct held_lock *hlock)
2829{
2830 unsigned int depth = curr->lockdep_depth;
2831
2832 /*
2833 * Keep track of points where we cross into an interrupt context:
2834 */
2835 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2836 curr->softirq_context;
2837 if (depth) {
2838 struct held_lock *prev_hlock;
2839
2840 prev_hlock = curr->held_locks + depth-1;
2841 /*
2842 * If we cross into another context, reset the
2843 * hash key (this also prevents the checking and the
2844 * adding of the dependency to 'prev'):
2845 */
2846 if (prev_hlock->irq_context != hlock->irq_context)
2847 return 1;
2848 }
2849 return 0;
2850}
2851
2852#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
2853
2854static inline
2855int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
2856 enum lock_usage_bit new_bit)
2857{
2858 WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */
2859 return 1;
2860}
2861
2862static inline int mark_irqflags(struct task_struct *curr,
2863 struct held_lock *hlock)
2864{
2865 return 1;
2866}
2867
2868static inline int separate_irq_context(struct task_struct *curr,
2869 struct held_lock *hlock)
2870{
2871 return 0;
2872}
2873
2874void lockdep_trace_alloc(gfp_t gfp_mask)
2875{
2876}
2877
2878#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
2879
2880/*
2881 * Mark a lock with a usage bit, and validate the state transition:
2882 */
2883static int mark_lock(struct task_struct *curr, struct held_lock *this,
2884 enum lock_usage_bit new_bit)
2885{
2886 unsigned int new_mask = 1 << new_bit, ret = 1;
2887
2888 /*
2889 * If already set then do not dirty the cacheline,
2890 * nor do any checks:
2891 */
2892 if (likely(hlock_class(this)->usage_mask & new_mask))
2893 return 1;
2894
2895 if (!graph_lock())
2896 return 0;
2897 /*
2898 * Make sure we didn't race:
2899 */
2900 if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
2901 graph_unlock();
2902 return 1;
2903 }
2904
2905 hlock_class(this)->usage_mask |= new_mask;
2906
2907 if (!save_trace(hlock_class(this)->usage_traces + new_bit))
2908 return 0;
2909
2910 switch (new_bit) {
2911#define LOCKDEP_STATE(__STATE) \
2912 case LOCK_USED_IN_##__STATE: \
2913 case LOCK_USED_IN_##__STATE##_READ: \
2914 case LOCK_ENABLED_##__STATE: \
2915 case LOCK_ENABLED_##__STATE##_READ:
2916#include "lockdep_states.h"
2917#undef LOCKDEP_STATE
2918 ret = mark_lock_irq(curr, this, new_bit);
2919 if (!ret)
2920 return 0;
2921 break;
2922 case LOCK_USED:
2923 debug_atomic_dec(nr_unused_locks);
2924 break;
2925 default:
2926 if (!debug_locks_off_graph_unlock())
2927 return 0;
2928 WARN_ON(1);
2929 return 0;
2930 }
2931
2932 graph_unlock();
2933
2934 /*
2935 * We must printk outside of the graph_lock:
2936 */
2937 if (ret == 2) {
2938 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
2939 print_lock(this);
2940 print_irqtrace_events(curr);
2941 dump_stack();
2942 }
2943
2944 return ret;
2945}
2946
2947/*
2948 * Initialize a lock instance's lock-class mapping info:
2949 */
2950void lockdep_init_map(struct lockdep_map *lock, const char *name,
2951 struct lock_class_key *key, int subclass)
2952{
2953 int i;
2954
2955 kmemcheck_mark_initialized(lock, sizeof(*lock));
2956
2957 for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
2958 lock->class_cache[i] = NULL;
2959
2960#ifdef CONFIG_LOCK_STAT
2961 lock->cpu = raw_smp_processor_id();
2962#endif
2963
2964 /*
2965 * Can't be having no nameless bastards around this place!
2966 */
2967 if (DEBUG_LOCKS_WARN_ON(!name)) {
2968 lock->name = "NULL";
2969 return;
2970 }
2971
2972 lock->name = name;
2973
2974 /*
2975 * No key, no joy, we need to hash something.
2976 */
2977 if (DEBUG_LOCKS_WARN_ON(!key))
2978 return;
2979 /*
2980 * Sanity check, the lock-class key must be persistent:
2981 */
2982 if (!static_obj(key)) {
2983 printk("BUG: key %p not in .data!\n", key);
2984 /*
2985 * What it says above ^^^^^, I suggest you read it.
2986 */
2987 DEBUG_LOCKS_WARN_ON(1);
2988 return;
2989 }
2990 lock->key = key;
2991
2992 if (unlikely(!debug_locks))
2993 return;
2994
2995 if (subclass)
2996 register_lock_class(lock, subclass, 1);
2997}
2998EXPORT_SYMBOL_GPL(lockdep_init_map);
2999
3000struct lock_class_key __lockdep_no_validate__;
3001EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
3002
3003static int
3004print_lock_nested_lock_not_held(struct task_struct *curr,
3005 struct held_lock *hlock,
3006 unsigned long ip)
3007{
3008 if (!debug_locks_off())
3009 return 0;
3010 if (debug_locks_silent)
3011 return 0;
3012
3013 printk("\n");
3014 printk("==================================\n");
3015 printk("[ BUG: Nested lock was not taken ]\n");
3016 print_kernel_ident();
3017 printk("----------------------------------\n");
3018
3019 printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
3020 print_lock(hlock);
3021
3022 printk("\nbut this task is not holding:\n");
3023 printk("%s\n", hlock->nest_lock->name);
3024
3025 printk("\nstack backtrace:\n");
3026 dump_stack();
3027
3028 printk("\nother info that might help us debug this:\n");
3029 lockdep_print_held_locks(curr);
3030
3031 printk("\nstack backtrace:\n");
3032 dump_stack();
3033
3034 return 0;
3035}
3036
3037static int __lock_is_held(struct lockdep_map *lock);
3038
3039/*
3040 * This gets called for every mutex_lock*()/spin_lock*() operation.
3041 * We maintain the dependency maps and validate the locking attempt:
3042 */
3043static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3044 int trylock, int read, int check, int hardirqs_off,
3045 struct lockdep_map *nest_lock, unsigned long ip,
3046 int references)
3047{
3048 struct task_struct *curr = current;
3049 struct lock_class *class = NULL;
3050 struct held_lock *hlock;
3051 unsigned int depth, id;
3052 int chain_head = 0;
3053 int class_idx;
3054 u64 chain_key;
3055
3056 if (!prove_locking)
3057 check = 1;
3058
3059 if (unlikely(!debug_locks))
3060 return 0;
3061
3062 /*
3063 * Lockdep should run with IRQs disabled, otherwise we could
3064 * get an interrupt which would want to take locks, which would
3065 * end up in lockdep and have you got a head-ache already?
3066 */
3067 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
3068 return 0;
3069
3070 if (lock->key == &__lockdep_no_validate__)
3071 check = 1;
3072
3073 if (subclass < NR_LOCKDEP_CACHING_CLASSES)
3074 class = lock->class_cache[subclass];
3075 /*
3076 * Not cached?
3077 */
3078 if (unlikely(!class)) {
3079 class = register_lock_class(lock, subclass, 0);
3080 if (!class)
3081 return 0;
3082 }
3083 atomic_inc((atomic_t *)&class->ops);
3084 if (very_verbose(class)) {
3085 printk("\nacquire class [%p] %s", class->key, class->name);
3086 if (class->name_version > 1)
3087 printk("#%d", class->name_version);
3088 printk("\n");
3089 dump_stack();
3090 }
3091
3092 /*
3093 * Add the lock to the list of currently held locks.
3094 * (we dont increase the depth just yet, up until the
3095 * dependency checks are done)
3096 */
3097 depth = curr->lockdep_depth;
3098 /*
3099 * Ran out of static storage for our per-task lock stack again have we?
3100 */
3101 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
3102 return 0;
3103
3104 class_idx = class - lock_classes + 1;
3105
3106 if (depth) {
3107 hlock = curr->held_locks + depth - 1;
3108 if (hlock->class_idx == class_idx && nest_lock) {
3109 if (hlock->references)
3110 hlock->references++;
3111 else
3112 hlock->references = 2;
3113
3114 return 1;
3115 }
3116 }
3117
3118 hlock = curr->held_locks + depth;
3119 /*
3120 * Plain impossible, we just registered it and checked it weren't no
3121 * NULL like.. I bet this mushroom I ate was good!
3122 */
3123 if (DEBUG_LOCKS_WARN_ON(!class))
3124 return 0;
3125 hlock->class_idx = class_idx;
3126 hlock->acquire_ip = ip;
3127 hlock->instance = lock;
3128 hlock->nest_lock = nest_lock;
3129 hlock->trylock = trylock;
3130 hlock->read = read;
3131 hlock->check = check;
3132 hlock->hardirqs_off = !!hardirqs_off;
3133 hlock->references = references;
3134#ifdef CONFIG_LOCK_STAT
3135 hlock->waittime_stamp = 0;
3136 hlock->holdtime_stamp = lockstat_clock();
3137#endif
3138
3139 if (check == 2 && !mark_irqflags(curr, hlock))
3140 return 0;
3141
3142 /* mark it as used: */
3143 if (!mark_lock(curr, hlock, LOCK_USED))
3144 return 0;
3145
3146 /*
3147 * Calculate the chain hash: it's the combined hash of all the
3148 * lock keys along the dependency chain. We save the hash value
3149 * at every step so that we can get the current hash easily
3150 * after unlock. The chain hash is then used to cache dependency
3151 * results.
3152 *
3153 * The 'key ID' is what is the most compact key value to drive
3154 * the hash, not class->key.
3155 */
3156 id = class - lock_classes;
3157 /*
3158 * Whoops, we did it again.. ran straight out of our static allocation.
3159 */
3160 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
3161 return 0;
3162
3163 chain_key = curr->curr_chain_key;
3164 if (!depth) {
3165 /*
3166 * How can we have a chain hash when we ain't got no keys?!
3167 */
3168 if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
3169 return 0;
3170 chain_head = 1;
3171 }
3172
3173 hlock->prev_chain_key = chain_key;
3174 if (separate_irq_context(curr, hlock)) {
3175 chain_key = 0;
3176 chain_head = 1;
3177 }
3178 chain_key = iterate_chain_key(chain_key, id);
3179
3180 if (nest_lock && !__lock_is_held(nest_lock))
3181 return print_lock_nested_lock_not_held(curr, hlock, ip);
3182
3183 if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
3184 return 0;
3185
3186 curr->curr_chain_key = chain_key;
3187 curr->lockdep_depth++;
3188 check_chain_key(curr);
3189#ifdef CONFIG_DEBUG_LOCKDEP
3190 if (unlikely(!debug_locks))
3191 return 0;
3192#endif
3193 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
3194 debug_locks_off();
3195 print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
3196 printk(KERN_DEBUG "depth: %i max: %lu!\n",
3197 curr->lockdep_depth, MAX_LOCK_DEPTH);
3198
3199 lockdep_print_held_locks(current);
3200 debug_show_all_locks();
3201 dump_stack();
3202
3203 return 0;
3204 }
3205
3206 if (unlikely(curr->lockdep_depth > max_lockdep_depth))
3207 max_lockdep_depth = curr->lockdep_depth;
3208
3209 return 1;
3210}
3211
3212static int
3213print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3214 unsigned long ip)
3215{
3216 if (!debug_locks_off())
3217 return 0;
3218 if (debug_locks_silent)
3219 return 0;
3220
3221 printk("\n");
3222 printk("=====================================\n");
3223 printk("[ BUG: bad unlock balance detected! ]\n");
3224 print_kernel_ident();
3225 printk("-------------------------------------\n");
3226 printk("%s/%d is trying to release lock (",
3227 curr->comm, task_pid_nr(curr));
3228 print_lockdep_cache(lock);
3229 printk(") at:\n");
3230 print_ip_sym(ip);
3231 printk("but there are no more locks to release!\n");
3232 printk("\nother info that might help us debug this:\n");
3233 lockdep_print_held_locks(curr);
3234
3235 printk("\nstack backtrace:\n");
3236 dump_stack();
3237
3238 return 0;
3239}
3240
3241/*
3242 * Common debugging checks for both nested and non-nested unlock:
3243 */
3244static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
3245 unsigned long ip)
3246{
3247 if (unlikely(!debug_locks))
3248 return 0;
3249 /*
3250 * Lockdep should run with IRQs disabled, recursion, head-ache, etc..
3251 */
3252 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
3253 return 0;
3254
3255 if (curr->lockdep_depth <= 0)
3256 return print_unlock_imbalance_bug(curr, lock, ip);
3257
3258 return 1;
3259}
3260
3261static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
3262{
3263 if (hlock->instance == lock)
3264 return 1;
3265
3266 if (hlock->references) {
3267 struct lock_class *class = lock->class_cache[0];
3268
3269 if (!class)
3270 class = look_up_lock_class(lock, 0);
3271
3272 /*
3273 * If look_up_lock_class() failed to find a class, we're trying
3274 * to test if we hold a lock that has never yet been acquired.
3275 * Clearly if the lock hasn't been acquired _ever_, we're not
3276 * holding it either, so report failure.
3277 */
3278 if (!class)
3279 return 0;
3280
3281 /*
3282 * References, but not a lock we're actually ref-counting?
3283 * State got messed up, follow the sites that change ->references
3284 * and try to make sense of it.
3285 */
3286 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
3287 return 0;
3288
3289 if (hlock->class_idx == class - lock_classes + 1)
3290 return 1;
3291 }
3292
3293 return 0;
3294}
3295
3296static int
3297__lock_set_class(struct lockdep_map *lock, const char *name,
3298 struct lock_class_key *key, unsigned int subclass,
3299 unsigned long ip)
3300{
3301 struct task_struct *curr = current;
3302 struct held_lock *hlock, *prev_hlock;
3303 struct lock_class *class;
3304 unsigned int depth;
3305 int i;
3306
3307 depth = curr->lockdep_depth;
3308 /*
3309 * This function is about (re)setting the class of a held lock,
3310 * yet we're not actually holding any locks. Naughty user!
3311 */
3312 if (DEBUG_LOCKS_WARN_ON(!depth))
3313 return 0;
3314
3315 prev_hlock = NULL;
3316 for (i = depth-1; i >= 0; i--) {
3317 hlock = curr->held_locks + i;
3318 /*
3319 * We must not cross into another context:
3320 */
3321 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
3322 break;
3323 if (match_held_lock(hlock, lock))
3324 goto found_it;
3325 prev_hlock = hlock;
3326 }
3327 return print_unlock_imbalance_bug(curr, lock, ip);
3328
3329found_it:
3330 lockdep_init_map(lock, name, key, 0);
3331 class = register_lock_class(lock, subclass, 0);
3332 hlock->class_idx = class - lock_classes + 1;
3333
3334 curr->lockdep_depth = i;
3335 curr->curr_chain_key = hlock->prev_chain_key;
3336
3337 for (; i < depth; i++) {
3338 hlock = curr->held_locks + i;
3339 if (!__lock_acquire(hlock->instance,
3340 hlock_class(hlock)->subclass, hlock->trylock,
3341 hlock->read, hlock->check, hlock->hardirqs_off,
3342 hlock->nest_lock, hlock->acquire_ip,
3343 hlock->references))
3344 return 0;
3345 }
3346
3347 /*
3348 * I took it apart and put it back together again, except now I have
3349 * these 'spare' parts.. where shall I put them.
3350 */
3351 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
3352 return 0;
3353 return 1;
3354}
3355
3356/*
3357 * Remove the lock to the list of currently held locks in a
3358 * potentially non-nested (out of order) manner. This is a
3359 * relatively rare operation, as all the unlock APIs default
3360 * to nested mode (which uses lock_release()):
3361 */
3362static int
3363lock_release_non_nested(struct task_struct *curr,
3364 struct lockdep_map *lock, unsigned long ip)
3365{
3366 struct held_lock *hlock, *prev_hlock;
3367 unsigned int depth;
3368 int i;
3369
3370 /*
3371 * Check whether the lock exists in the current stack
3372 * of held locks:
3373 */
3374 depth = curr->lockdep_depth;
3375 /*
3376 * So we're all set to release this lock.. wait what lock? We don't
3377 * own any locks, you've been drinking again?
3378 */
3379 if (DEBUG_LOCKS_WARN_ON(!depth))
3380 return 0;
3381
3382 prev_hlock = NULL;
3383 for (i = depth-1; i >= 0; i--) {
3384 hlock = curr->held_locks + i;
3385 /*
3386 * We must not cross into another context:
3387 */
3388 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
3389 break;
3390 if (match_held_lock(hlock, lock))
3391 goto found_it;
3392 prev_hlock = hlock;
3393 }
3394 return print_unlock_imbalance_bug(curr, lock, ip);
3395
3396found_it:
3397 if (hlock->instance == lock)
3398 lock_release_holdtime(hlock);
3399
3400 if (hlock->references) {
3401 hlock->references--;
3402 if (hlock->references) {
3403 /*
3404 * We had, and after removing one, still have
3405 * references, the current lock stack is still
3406 * valid. We're done!
3407 */
3408 return 1;
3409 }
3410 }
3411
3412 /*
3413 * We have the right lock to unlock, 'hlock' points to it.
3414 * Now we remove it from the stack, and add back the other
3415 * entries (if any), recalculating the hash along the way:
3416 */
3417
3418 curr->lockdep_depth = i;
3419 curr->curr_chain_key = hlock->prev_chain_key;
3420
3421 for (i++; i < depth; i++) {
3422 hlock = curr->held_locks + i;
3423 if (!__lock_acquire(hlock->instance,
3424 hlock_class(hlock)->subclass, hlock->trylock,
3425 hlock->read, hlock->check, hlock->hardirqs_off,
3426 hlock->nest_lock, hlock->acquire_ip,
3427 hlock->references))
3428 return 0;
3429 }
3430
3431 /*
3432 * We had N bottles of beer on the wall, we drank one, but now
3433 * there's not N-1 bottles of beer left on the wall...
3434 */
3435 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
3436 return 0;
3437 return 1;
3438}
3439
3440/*
3441 * Remove the lock to the list of currently held locks - this gets
3442 * called on mutex_unlock()/spin_unlock*() (or on a failed
3443 * mutex_lock_interruptible()). This is done for unlocks that nest
3444 * perfectly. (i.e. the current top of the lock-stack is unlocked)
3445 */
3446static int lock_release_nested(struct task_struct *curr,
3447 struct lockdep_map *lock, unsigned long ip)
3448{
3449 struct held_lock *hlock;
3450 unsigned int depth;
3451
3452 /*
3453 * Pop off the top of the lock stack:
3454 */
3455 depth = curr->lockdep_depth - 1;
3456 hlock = curr->held_locks + depth;
3457
3458 /*
3459 * Is the unlock non-nested:
3460 */
3461 if (hlock->instance != lock || hlock->references)
3462 return lock_release_non_nested(curr, lock, ip);
3463 curr->lockdep_depth--;
3464
3465 /*
3466 * No more locks, but somehow we've got hash left over, who left it?
3467 */
3468 if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
3469 return 0;
3470
3471 curr->curr_chain_key = hlock->prev_chain_key;
3472
3473 lock_release_holdtime(hlock);
3474
3475#ifdef CONFIG_DEBUG_LOCKDEP
3476 hlock->prev_chain_key = 0;
3477 hlock->class_idx = 0;
3478 hlock->acquire_ip = 0;
3479 hlock->irq_context = 0;
3480#endif
3481 return 1;
3482}
3483
3484/*
3485 * Remove the lock to the list of currently held locks - this gets
3486 * called on mutex_unlock()/spin_unlock*() (or on a failed
3487 * mutex_lock_interruptible()). This is done for unlocks that nest
3488 * perfectly. (i.e. the current top of the lock-stack is unlocked)
3489 */
3490static void
3491__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
3492{
3493 struct task_struct *curr = current;
3494
3495 if (!check_unlock(curr, lock, ip))
3496 return;
3497
3498 if (nested) {
3499 if (!lock_release_nested(curr, lock, ip))
3500 return;
3501 } else {
3502 if (!lock_release_non_nested(curr, lock, ip))
3503 return;
3504 }
3505
3506 check_chain_key(curr);
3507}
3508
3509static int __lock_is_held(struct lockdep_map *lock)
3510{
3511 struct task_struct *curr = current;
3512 int i;
3513
3514 for (i = 0; i < curr->lockdep_depth; i++) {
3515 struct held_lock *hlock = curr->held_locks + i;
3516
3517 if (match_held_lock(hlock, lock))
3518 return 1;
3519 }
3520
3521 return 0;
3522}
3523
3524/*
3525 * Check whether we follow the irq-flags state precisely:
3526 */
3527static void check_flags(unsigned long flags)
3528{
3529#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) && \
3530 defined(CONFIG_TRACE_IRQFLAGS)
3531 if (!debug_locks)
3532 return;
3533
3534 if (irqs_disabled_flags(flags)) {
3535 if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
3536 printk("possible reason: unannotated irqs-off.\n");
3537 }
3538 } else {
3539 if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
3540 printk("possible reason: unannotated irqs-on.\n");
3541 }
3542 }
3543
3544 /*
3545 * We dont accurately track softirq state in e.g.
3546 * hardirq contexts (such as on 4KSTACKS), so only
3547 * check if not in hardirq contexts:
3548 */
3549 if (!hardirq_count()) {
3550 if (softirq_count()) {
3551 /* like the above, but with softirqs */
3552 DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
3553 } else {
3554 /* lick the above, does it taste good? */
3555 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
3556 }
3557 }
3558
3559 if (!debug_locks)
3560 print_irqtrace_events(current);
3561#endif
3562}
3563
3564void lock_set_class(struct lockdep_map *lock, const char *name,
3565 struct lock_class_key *key, unsigned int subclass,
3566 unsigned long ip)
3567{
3568 unsigned long flags;
3569
3570 if (unlikely(current->lockdep_recursion))
3571 return;
3572
3573 raw_local_irq_save(flags);
3574 current->lockdep_recursion = 1;
3575 check_flags(flags);
3576 if (__lock_set_class(lock, name, key, subclass, ip))
3577 check_chain_key(current);
3578 current->lockdep_recursion = 0;
3579 raw_local_irq_restore(flags);
3580}
3581EXPORT_SYMBOL_GPL(lock_set_class);
3582
3583/*
3584 * We are not always called with irqs disabled - do that here,
3585 * and also avoid lockdep recursion:
3586 */
3587void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3588 int trylock, int read, int check,
3589 struct lockdep_map *nest_lock, unsigned long ip)
3590{
3591 unsigned long flags;
3592
3593 if (unlikely(current->lockdep_recursion))
3594 return;
3595
3596 raw_local_irq_save(flags);
3597 check_flags(flags);
3598
3599 current->lockdep_recursion = 1;
3600 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
3601 __lock_acquire(lock, subclass, trylock, read, check,
3602 irqs_disabled_flags(flags), nest_lock, ip, 0);
3603 current->lockdep_recursion = 0;
3604 raw_local_irq_restore(flags);
3605}
3606EXPORT_SYMBOL_GPL(lock_acquire);
3607
3608void lock_release(struct lockdep_map *lock, int nested,
3609 unsigned long ip)
3610{
3611 unsigned long flags;
3612
3613 if (unlikely(current->lockdep_recursion))
3614 return;
3615
3616 raw_local_irq_save(flags);
3617 check_flags(flags);
3618 current->lockdep_recursion = 1;
3619 trace_lock_release(lock, ip);
3620 __lock_release(lock, nested, ip);
3621 current->lockdep_recursion = 0;
3622 raw_local_irq_restore(flags);
3623}
3624EXPORT_SYMBOL_GPL(lock_release);
3625
3626int lock_is_held(struct lockdep_map *lock)
3627{
3628 unsigned long flags;
3629 int ret = 0;
3630
3631 if (unlikely(current->lockdep_recursion))
3632 return 1; /* avoid false negative lockdep_assert_held() */
3633
3634 raw_local_irq_save(flags);
3635 check_flags(flags);
3636
3637 current->lockdep_recursion = 1;
3638 ret = __lock_is_held(lock);
3639 current->lockdep_recursion = 0;
3640 raw_local_irq_restore(flags);
3641
3642 return ret;
3643}
3644EXPORT_SYMBOL_GPL(lock_is_held);
3645
3646void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
3647{
3648 current->lockdep_reclaim_gfp = gfp_mask;
3649}
3650
3651void lockdep_clear_current_reclaim_state(void)
3652{
3653 current->lockdep_reclaim_gfp = 0;
3654}
3655
3656#ifdef CONFIG_LOCK_STAT
3657static int
3658print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
3659 unsigned long ip)
3660{
3661 if (!debug_locks_off())
3662 return 0;
3663 if (debug_locks_silent)
3664 return 0;
3665
3666 printk("\n");
3667 printk("=================================\n");
3668 printk("[ BUG: bad contention detected! ]\n");
3669 print_kernel_ident();
3670 printk("---------------------------------\n");
3671 printk("%s/%d is trying to contend lock (",
3672 curr->comm, task_pid_nr(curr));
3673 print_lockdep_cache(lock);
3674 printk(") at:\n");
3675 print_ip_sym(ip);
3676 printk("but there are no locks held!\n");
3677 printk("\nother info that might help us debug this:\n");
3678 lockdep_print_held_locks(curr);
3679
3680 printk("\nstack backtrace:\n");
3681 dump_stack();
3682
3683 return 0;
3684}
3685
3686static void
3687__lock_contended(struct lockdep_map *lock, unsigned long ip)
3688{
3689 struct task_struct *curr = current;
3690 struct held_lock *hlock, *prev_hlock;
3691 struct lock_class_stats *stats;
3692 unsigned int depth;
3693 int i, contention_point, contending_point;
3694
3695 depth = curr->lockdep_depth;
3696 /*
3697 * Whee, we contended on this lock, except it seems we're not
3698 * actually trying to acquire anything much at all..
3699 */
3700 if (DEBUG_LOCKS_WARN_ON(!depth))
3701 return;
3702
3703 prev_hlock = NULL;
3704 for (i = depth-1; i >= 0; i--) {
3705 hlock = curr->held_locks + i;
3706 /*
3707 * We must not cross into another context:
3708 */
3709 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
3710 break;
3711 if (match_held_lock(hlock, lock))
3712 goto found_it;
3713 prev_hlock = hlock;
3714 }
3715 print_lock_contention_bug(curr, lock, ip);
3716 return;
3717
3718found_it:
3719 if (hlock->instance != lock)
3720 return;
3721
3722 hlock->waittime_stamp = lockstat_clock();
3723
3724 contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
3725 contending_point = lock_point(hlock_class(hlock)->contending_point,
3726 lock->ip);
3727
3728 stats = get_lock_stats(hlock_class(hlock));
3729 if (contention_point < LOCKSTAT_POINTS)
3730 stats->contention_point[contention_point]++;
3731 if (contending_point < LOCKSTAT_POINTS)
3732 stats->contending_point[contending_point]++;
3733 if (lock->cpu != smp_processor_id())
3734 stats->bounces[bounce_contended + !!hlock->read]++;
3735 put_lock_stats(stats);
3736}
3737
3738static void
3739__lock_acquired(struct lockdep_map *lock, unsigned long ip)
3740{
3741 struct task_struct *curr = current;
3742 struct held_lock *hlock, *prev_hlock;
3743 struct lock_class_stats *stats;
3744 unsigned int depth;
3745 u64 now, waittime = 0;
3746 int i, cpu;
3747
3748 depth = curr->lockdep_depth;
3749 /*
3750 * Yay, we acquired ownership of this lock we didn't try to
3751 * acquire, how the heck did that happen?
3752 */
3753 if (DEBUG_LOCKS_WARN_ON(!depth))
3754 return;
3755
3756 prev_hlock = NULL;
3757 for (i = depth-1; i >= 0; i--) {
3758 hlock = curr->held_locks + i;
3759 /*
3760 * We must not cross into another context:
3761 */
3762 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
3763 break;
3764 if (match_held_lock(hlock, lock))
3765 goto found_it;
3766 prev_hlock = hlock;
3767 }
3768 print_lock_contention_bug(curr, lock, _RET_IP_);
3769 return;
3770
3771found_it:
3772 if (hlock->instance != lock)
3773 return;
3774
3775 cpu = smp_processor_id();
3776 if (hlock->waittime_stamp) {
3777 now = lockstat_clock();
3778 waittime = now - hlock->waittime_stamp;
3779 hlock->holdtime_stamp = now;
3780 }
3781
3782 trace_lock_acquired(lock, ip);
3783
3784 stats = get_lock_stats(hlock_class(hlock));
3785 if (waittime) {
3786 if (hlock->read)
3787 lock_time_inc(&stats->read_waittime, waittime);
3788 else
3789 lock_time_inc(&stats->write_waittime, waittime);
3790 }
3791 if (lock->cpu != cpu)
3792 stats->bounces[bounce_acquired + !!hlock->read]++;
3793 put_lock_stats(stats);
3794
3795 lock->cpu = cpu;
3796 lock->ip = ip;
3797}
3798
3799void lock_contended(struct lockdep_map *lock, unsigned long ip)
3800{
3801 unsigned long flags;
3802
3803 if (unlikely(!lock_stat))
3804 return;
3805
3806 if (unlikely(current->lockdep_recursion))
3807 return;
3808
3809 raw_local_irq_save(flags);
3810 check_flags(flags);
3811 current->lockdep_recursion = 1;
3812 trace_lock_contended(lock, ip);
3813 __lock_contended(lock, ip);
3814 current->lockdep_recursion = 0;
3815 raw_local_irq_restore(flags);
3816}
3817EXPORT_SYMBOL_GPL(lock_contended);
3818
3819void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3820{
3821 unsigned long flags;
3822
3823 if (unlikely(!lock_stat))
3824 return;
3825
3826 if (unlikely(current->lockdep_recursion))
3827 return;
3828
3829 raw_local_irq_save(flags);
3830 check_flags(flags);
3831 current->lockdep_recursion = 1;
3832 __lock_acquired(lock, ip);
3833 current->lockdep_recursion = 0;
3834 raw_local_irq_restore(flags);
3835}
3836EXPORT_SYMBOL_GPL(lock_acquired);
3837#endif
3838
3839/*
3840 * Used by the testsuite, sanitize the validator state
3841 * after a simulated failure:
3842 */
3843
3844void lockdep_reset(void)
3845{
3846 unsigned long flags;
3847 int i;
3848
3849 raw_local_irq_save(flags);
3850 current->curr_chain_key = 0;
3851 current->lockdep_depth = 0;
3852 current->lockdep_recursion = 0;
3853 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
3854 nr_hardirq_chains = 0;
3855 nr_softirq_chains = 0;
3856 nr_process_chains = 0;
3857 debug_locks = 1;
3858 for (i = 0; i < CHAINHASH_SIZE; i++)
3859 INIT_LIST_HEAD(chainhash_table + i);
3860 raw_local_irq_restore(flags);
3861}
3862
3863static void zap_class(struct lock_class *class)
3864{
3865 int i;
3866
3867 /*
3868 * Remove all dependencies this lock is
3869 * involved in:
3870 */
3871 for (i = 0; i < nr_list_entries; i++) {
3872 if (list_entries[i].class == class)
3873 list_del_rcu(&list_entries[i].entry);
3874 }
3875 /*
3876 * Unhash the class and remove it from the all_lock_classes list:
3877 */
3878 list_del_rcu(&class->hash_entry);
3879 list_del_rcu(&class->lock_entry);
3880
3881 class->key = NULL;
3882}
3883
3884static inline int within(const void *addr, void *start, unsigned long size)
3885{
3886 return addr >= start && addr < start + size;
3887}
3888
3889void lockdep_free_key_range(void *start, unsigned long size)
3890{
3891 struct lock_class *class, *next;
3892 struct list_head *head;
3893 unsigned long flags;
3894 int i;
3895 int locked;
3896
3897 raw_local_irq_save(flags);
3898 locked = graph_lock();
3899
3900 /*
3901 * Unhash all classes that were created by this module:
3902 */
3903 for (i = 0; i < CLASSHASH_SIZE; i++) {
3904 head = classhash_table + i;
3905 if (list_empty(head))
3906 continue;
3907 list_for_each_entry_safe(class, next, head, hash_entry) {
3908 if (within(class->key, start, size))
3909 zap_class(class);
3910 else if (within(class->name, start, size))
3911 zap_class(class);
3912 }
3913 }
3914
3915 if (locked)
3916 graph_unlock();
3917 raw_local_irq_restore(flags);
3918}
3919
3920void lockdep_reset_lock(struct lockdep_map *lock)
3921{
3922 struct lock_class *class, *next;
3923 struct list_head *head;
3924 unsigned long flags;
3925 int i, j;
3926 int locked;
3927
3928 raw_local_irq_save(flags);
3929
3930 /*
3931 * Remove all classes this lock might have:
3932 */
3933 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
3934 /*
3935 * If the class exists we look it up and zap it:
3936 */
3937 class = look_up_lock_class(lock, j);
3938 if (class)
3939 zap_class(class);
3940 }
3941 /*
3942 * Debug check: in the end all mapped classes should
3943 * be gone.
3944 */
3945 locked = graph_lock();
3946 for (i = 0; i < CLASSHASH_SIZE; i++) {
3947 head = classhash_table + i;
3948 if (list_empty(head))
3949 continue;
3950 list_for_each_entry_safe(class, next, head, hash_entry) {
3951 int match = 0;
3952
3953 for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
3954 match |= class == lock->class_cache[j];
3955
3956 if (unlikely(match)) {
3957 if (debug_locks_off_graph_unlock()) {
3958 /*
3959 * We all just reset everything, how did it match?
3960 */
3961 WARN_ON(1);
3962 }
3963 goto out_restore;
3964 }
3965 }
3966 }
3967 if (locked)
3968 graph_unlock();
3969
3970out_restore:
3971 raw_local_irq_restore(flags);
3972}
3973
3974void lockdep_init(void)
3975{
3976 int i;
3977
3978 /*
3979 * Some architectures have their own start_kernel()
3980 * code which calls lockdep_init(), while we also
3981 * call lockdep_init() from the start_kernel() itself,
3982 * and we want to initialize the hashes only once:
3983 */
3984 if (lockdep_initialized)
3985 return;
3986
3987 for (i = 0; i < CLASSHASH_SIZE; i++)
3988 INIT_LIST_HEAD(classhash_table + i);
3989
3990 for (i = 0; i < CHAINHASH_SIZE; i++)
3991 INIT_LIST_HEAD(chainhash_table + i);
3992
3993 lockdep_initialized = 1;
3994}
3995
3996void __init lockdep_info(void)
3997{
3998 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
3999
4000 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
4001 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
4002 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
4003 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
4004 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
4005 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
4006 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
4007
4008 printk(" memory used by lock dependency info: %lu kB\n",
4009 (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
4010 sizeof(struct list_head) * CLASSHASH_SIZE +
4011 sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
4012 sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
4013 sizeof(struct list_head) * CHAINHASH_SIZE
4014#ifdef CONFIG_PROVE_LOCKING
4015 + sizeof(struct circular_queue)
4016#endif
4017 ) / 1024
4018 );
4019
4020 printk(" per task-struct memory footprint: %lu bytes\n",
4021 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
4022
4023#ifdef CONFIG_DEBUG_LOCKDEP
4024 if (lockdep_init_error) {
4025 printk("WARNING: lockdep init error! lock-%s was acquired"
4026 "before lockdep_init\n", lock_init_error);
4027 printk("Call stack leading to lockdep invocation was:\n");
4028 print_stack_trace(&lockdep_init_trace, 0);
4029 }
4030#endif
4031}
4032
4033static void
4034print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
4035 const void *mem_to, struct held_lock *hlock)
4036{
4037 if (!debug_locks_off())
4038 return;
4039 if (debug_locks_silent)
4040 return;
4041
4042 printk("\n");
4043 printk("=========================\n");
4044 printk("[ BUG: held lock freed! ]\n");
4045 print_kernel_ident();
4046 printk("-------------------------\n");
4047 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
4048 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
4049 print_lock(hlock);
4050 lockdep_print_held_locks(curr);
4051
4052 printk("\nstack backtrace:\n");
4053 dump_stack();
4054}
4055
4056static inline int not_in_range(const void* mem_from, unsigned long mem_len,
4057 const void* lock_from, unsigned long lock_len)
4058{
4059 return lock_from + lock_len <= mem_from ||
4060 mem_from + mem_len <= lock_from;
4061}
4062
4063/*
4064 * Called when kernel memory is freed (or unmapped), or if a lock
4065 * is destroyed or reinitialized - this code checks whether there is
4066 * any held lock in the memory range of <from> to <to>:
4067 */
4068void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
4069{
4070 struct task_struct *curr = current;
4071 struct held_lock *hlock;
4072 unsigned long flags;
4073 int i;
4074
4075 if (unlikely(!debug_locks))
4076 return;
4077
4078 local_irq_save(flags);
4079 for (i = 0; i < curr->lockdep_depth; i++) {
4080 hlock = curr->held_locks + i;
4081
4082 if (not_in_range(mem_from, mem_len, hlock->instance,
4083 sizeof(*hlock->instance)))
4084 continue;
4085
4086 print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
4087 break;
4088 }
4089 local_irq_restore(flags);
4090}
4091EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
4092
4093static void print_held_locks_bug(void)
4094{
4095 if (!debug_locks_off())
4096 return;
4097 if (debug_locks_silent)
4098 return;
4099
4100 printk("\n");
4101 printk("=====================================\n");
4102 printk("[ BUG: %s/%d still has locks held! ]\n",
4103 current->comm, task_pid_nr(current));
4104 print_kernel_ident();
4105 printk("-------------------------------------\n");
4106 lockdep_print_held_locks(current);
4107 printk("\nstack backtrace:\n");
4108 dump_stack();
4109}
4110
4111void debug_check_no_locks_held(void)
4112{
4113 if (unlikely(current->lockdep_depth > 0))
4114 print_held_locks_bug();
4115}
4116EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
4117
4118void debug_show_all_locks(void)
4119{
4120 struct task_struct *g, *p;
4121 int count = 10;
4122 int unlock = 1;
4123
4124 if (unlikely(!debug_locks)) {
4125 printk("INFO: lockdep is turned off.\n");
4126 return;
4127 }
4128 printk("\nShowing all locks held in the system:\n");
4129
4130 /*
4131 * Here we try to get the tasklist_lock as hard as possible,
4132 * if not successful after 2 seconds we ignore it (but keep
4133 * trying). This is to enable a debug printout even if a
4134 * tasklist_lock-holding task deadlocks or crashes.
4135 */
4136retry:
4137 if (!read_trylock(&tasklist_lock)) {
4138 if (count == 10)
4139 printk("hm, tasklist_lock locked, retrying... ");
4140 if (count) {
4141 count--;
4142 printk(" #%d", 10-count);
4143 mdelay(200);
4144 goto retry;
4145 }
4146 printk(" ignoring it.\n");
4147 unlock = 0;
4148 } else {
4149 if (count != 10)
4150 printk(KERN_CONT " locked it.\n");
4151 }
4152
4153 do_each_thread(g, p) {
4154 /*
4155 * It's not reliable to print a task's held locks
4156 * if it's not sleeping (or if it's not the current
4157 * task):
4158 */
4159 if (p->state == TASK_RUNNING && p != current)
4160 continue;
4161 if (p->lockdep_depth)
4162 lockdep_print_held_locks(p);
4163 if (!unlock)
4164 if (read_trylock(&tasklist_lock))
4165 unlock = 1;
4166 } while_each_thread(g, p);
4167
4168 printk("\n");
4169 printk("=============================================\n\n");
4170
4171 if (unlock)
4172 read_unlock(&tasklist_lock);
4173}
4174EXPORT_SYMBOL_GPL(debug_show_all_locks);
4175
4176/*
4177 * Careful: only use this function if you are sure that
4178 * the task cannot run in parallel!
4179 */
4180void debug_show_held_locks(struct task_struct *task)
4181{
4182 if (unlikely(!debug_locks)) {
4183 printk("INFO: lockdep is turned off.\n");
4184 return;
4185 }
4186 lockdep_print_held_locks(task);
4187}
4188EXPORT_SYMBOL_GPL(debug_show_held_locks);
4189
4190void lockdep_sys_exit(void)
4191{
4192 struct task_struct *curr = current;
4193
4194 if (unlikely(curr->lockdep_depth)) {
4195 if (!debug_locks_off())
4196 return;
4197 printk("\n");
4198 printk("================================================\n");
4199 printk("[ BUG: lock held when returning to user space! ]\n");
4200 print_kernel_ident();
4201 printk("------------------------------------------------\n");
4202 printk("%s/%d is leaving the kernel with locks still held!\n",
4203 curr->comm, curr->pid);
4204 lockdep_print_held_locks(curr);
4205 }
4206}
4207
4208void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4209{
4210 struct task_struct *curr = current;
4211
4212#ifndef CONFIG_PROVE_RCU_REPEATEDLY
4213 if (!debug_locks_off())
4214 return;
4215#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
4216 /* Note: the following can be executed concurrently, so be careful. */
4217 printk("\n");
4218 printk("===============================\n");
4219 printk("[ INFO: suspicious RCU usage. ]\n");
4220 print_kernel_ident();
4221 printk("-------------------------------\n");
4222 printk("%s:%d %s!\n", file, line, s);
4223 printk("\nother info that might help us debug this:\n\n");
4224 printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
4225 !rcu_lockdep_current_cpu_online()
4226 ? "RCU used illegally from offline CPU!\n"
4227 : !rcu_is_watching()
4228 ? "RCU used illegally from idle CPU!\n"
4229 : "",
4230 rcu_scheduler_active, debug_locks);
4231
4232 /*
4233 * If a CPU is in the RCU-free window in idle (ie: in the section
4234 * between rcu_idle_enter() and rcu_idle_exit(), then RCU
4235 * considers that CPU to be in an "extended quiescent state",
4236 * which means that RCU will be completely ignoring that CPU.
4237 * Therefore, rcu_read_lock() and friends have absolutely no
4238 * effect on a CPU running in that state. In other words, even if
4239 * such an RCU-idle CPU has called rcu_read_lock(), RCU might well
4240 * delete data structures out from under it. RCU really has no
4241 * choice here: we need to keep an RCU-free window in idle where
4242 * the CPU may possibly enter into low power mode. This way we can
4243 * notice an extended quiescent state to other CPUs that started a grace
4244 * period. Otherwise we would delay any grace period as long as we run
4245 * in the idle task.
4246 *
4247 * So complain bitterly if someone does call rcu_read_lock(),
4248 * rcu_read_lock_bh() and so on from extended quiescent states.
4249 */
4250 if (!rcu_is_watching())
4251 printk("RCU used illegally from extended quiescent state!\n");
4252
4253 lockdep_print_held_locks(curr);
4254 printk("\nstack backtrace:\n");
4255 dump_stack();
4256}
4257EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
new file mode 100644
index 000000000000..4f560cfedc8f
--- /dev/null
+++ b/kernel/locking/lockdep_internals.h
@@ -0,0 +1,170 @@
1/*
2 * kernel/lockdep_internals.h
3 *
4 * Runtime locking correctness validator
5 *
6 * lockdep subsystem internal functions and variables.
7 */
8
9/*
10 * Lock-class usage-state bits:
11 */
12enum lock_usage_bit {
13#define LOCKDEP_STATE(__STATE) \
14 LOCK_USED_IN_##__STATE, \
15 LOCK_USED_IN_##__STATE##_READ, \
16 LOCK_ENABLED_##__STATE, \
17 LOCK_ENABLED_##__STATE##_READ,
18#include "lockdep_states.h"
19#undef LOCKDEP_STATE
20 LOCK_USED,
21 LOCK_USAGE_STATES
22};
23
24/*
25 * Usage-state bitmasks:
26 */
27#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE),
28
29enum {
30#define LOCKDEP_STATE(__STATE) \
31 __LOCKF(USED_IN_##__STATE) \
32 __LOCKF(USED_IN_##__STATE##_READ) \
33 __LOCKF(ENABLED_##__STATE) \
34 __LOCKF(ENABLED_##__STATE##_READ)
35#include "lockdep_states.h"
36#undef LOCKDEP_STATE
37 __LOCKF(USED)
38};
39
40#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
41#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
42
43#define LOCKF_ENABLED_IRQ_READ \
44 (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
45#define LOCKF_USED_IN_IRQ_READ \
46 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
47
48/*
49 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
50 * we track.
51 *
52 * We use the per-lock dependency maps in two ways: we grow it by adding
53 * every to-be-taken lock to all currently held lock's own dependency
54 * table (if it's not there yet), and we check it for lock order
55 * conflicts and deadlocks.
56 */
57#define MAX_LOCKDEP_ENTRIES 16384UL
58
59#define MAX_LOCKDEP_CHAINS_BITS 15
60#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
61
62#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
63
64/*
65 * Stack-trace: tightly packed array of stack backtrace
66 * addresses. Protected by the hash_lock.
67 */
68#define MAX_STACK_TRACE_ENTRIES 262144UL
69
70extern struct list_head all_lock_classes;
71extern struct lock_chain lock_chains[];
72
73#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
74
75extern void get_usage_chars(struct lock_class *class,
76 char usage[LOCK_USAGE_CHARS]);
77
78extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
79
80struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
81
82extern unsigned long nr_lock_classes;
83extern unsigned long nr_list_entries;
84extern unsigned long nr_lock_chains;
85extern int nr_chain_hlocks;
86extern unsigned long nr_stack_trace_entries;
87
88extern unsigned int nr_hardirq_chains;
89extern unsigned int nr_softirq_chains;
90extern unsigned int nr_process_chains;
91extern unsigned int max_lockdep_depth;
92extern unsigned int max_recursion_depth;
93
94extern unsigned int max_bfs_queue_depth;
95
96#ifdef CONFIG_PROVE_LOCKING
97extern unsigned long lockdep_count_forward_deps(struct lock_class *);
98extern unsigned long lockdep_count_backward_deps(struct lock_class *);
99#else
100static inline unsigned long
101lockdep_count_forward_deps(struct lock_class *class)
102{
103 return 0;
104}
105static inline unsigned long
106lockdep_count_backward_deps(struct lock_class *class)
107{
108 return 0;
109}
110#endif
111
112#ifdef CONFIG_DEBUG_LOCKDEP
113
114#include <asm/local.h>
115/*
116 * Various lockdep statistics.
117 * We want them per cpu as they are often accessed in fast path
118 * and we want to avoid too much cache bouncing.
119 */
120struct lockdep_stats {
121 int chain_lookup_hits;
122 int chain_lookup_misses;
123 int hardirqs_on_events;
124 int hardirqs_off_events;
125 int redundant_hardirqs_on;
126 int redundant_hardirqs_off;
127 int softirqs_on_events;
128 int softirqs_off_events;
129 int redundant_softirqs_on;
130 int redundant_softirqs_off;
131 int nr_unused_locks;
132 int nr_cyclic_checks;
133 int nr_cyclic_check_recursions;
134 int nr_find_usage_forwards_checks;
135 int nr_find_usage_forwards_recursions;
136 int nr_find_usage_backwards_checks;
137 int nr_find_usage_backwards_recursions;
138};
139
140DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
141
142#define __debug_atomic_inc(ptr) \
143 this_cpu_inc(lockdep_stats.ptr);
144
145#define debug_atomic_inc(ptr) { \
146 WARN_ON_ONCE(!irqs_disabled()); \
147 __this_cpu_inc(lockdep_stats.ptr); \
148}
149
150#define debug_atomic_dec(ptr) { \
151 WARN_ON_ONCE(!irqs_disabled()); \
152 __this_cpu_dec(lockdep_stats.ptr); \
153}
154
155#define debug_atomic_read(ptr) ({ \
156 struct lockdep_stats *__cpu_lockdep_stats; \
157 unsigned long long __total = 0; \
158 int __cpu; \
159 for_each_possible_cpu(__cpu) { \
160 __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \
161 __total += __cpu_lockdep_stats->ptr; \
162 } \
163 __total; \
164})
165#else
166# define __debug_atomic_inc(ptr) do { } while (0)
167# define debug_atomic_inc(ptr) do { } while (0)
168# define debug_atomic_dec(ptr) do { } while (0)
169# define debug_atomic_read(ptr) 0
170#endif
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
new file mode 100644
index 000000000000..ef43ac4bafb5
--- /dev/null
+++ b/kernel/locking/lockdep_proc.c
@@ -0,0 +1,683 @@
1/*
2 * kernel/lockdep_proc.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
10 *
11 * Code for /proc/lockdep and /proc/lockdep_stats:
12 *
13 */
14#include <linux/export.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/kallsyms.h>
18#include <linux/debug_locks.h>
19#include <linux/vmalloc.h>
20#include <linux/sort.h>
21#include <asm/uaccess.h>
22#include <asm/div64.h>
23
24#include "lockdep_internals.h"
25
26static void *l_next(struct seq_file *m, void *v, loff_t *pos)
27{
28 return seq_list_next(v, &all_lock_classes, pos);
29}
30
31static void *l_start(struct seq_file *m, loff_t *pos)
32{
33 return seq_list_start_head(&all_lock_classes, *pos);
34}
35
36static void l_stop(struct seq_file *m, void *v)
37{
38}
39
40static void print_name(struct seq_file *m, struct lock_class *class)
41{
42 char str[KSYM_NAME_LEN];
43 const char *name = class->name;
44
45 if (!name) {
46 name = __get_key_name(class->key, str);
47 seq_printf(m, "%s", name);
48 } else{
49 seq_printf(m, "%s", name);
50 if (class->name_version > 1)
51 seq_printf(m, "#%d", class->name_version);
52 if (class->subclass)
53 seq_printf(m, "/%d", class->subclass);
54 }
55}
56
57static int l_show(struct seq_file *m, void *v)
58{
59 struct lock_class *class = list_entry(v, struct lock_class, lock_entry);
60 struct lock_list *entry;
61 char usage[LOCK_USAGE_CHARS];
62
63 if (v == &all_lock_classes) {
64 seq_printf(m, "all lock classes:\n");
65 return 0;
66 }
67
68 seq_printf(m, "%p", class->key);
69#ifdef CONFIG_DEBUG_LOCKDEP
70 seq_printf(m, " OPS:%8ld", class->ops);
71#endif
72#ifdef CONFIG_PROVE_LOCKING
73 seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
74 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
75#endif
76
77 get_usage_chars(class, usage);
78 seq_printf(m, " %s", usage);
79
80 seq_printf(m, ": ");
81 print_name(m, class);
82 seq_puts(m, "\n");
83
84 list_for_each_entry(entry, &class->locks_after, entry) {
85 if (entry->distance == 1) {
86 seq_printf(m, " -> [%p] ", entry->class->key);
87 print_name(m, entry->class);
88 seq_puts(m, "\n");
89 }
90 }
91 seq_puts(m, "\n");
92
93 return 0;
94}
95
96static const struct seq_operations lockdep_ops = {
97 .start = l_start,
98 .next = l_next,
99 .stop = l_stop,
100 .show = l_show,
101};
102
103static int lockdep_open(struct inode *inode, struct file *file)
104{
105 return seq_open(file, &lockdep_ops);
106}
107
108static const struct file_operations proc_lockdep_operations = {
109 .open = lockdep_open,
110 .read = seq_read,
111 .llseek = seq_lseek,
112 .release = seq_release,
113};
114
115#ifdef CONFIG_PROVE_LOCKING
116static void *lc_start(struct seq_file *m, loff_t *pos)
117{
118 if (*pos == 0)
119 return SEQ_START_TOKEN;
120
121 if (*pos - 1 < nr_lock_chains)
122 return lock_chains + (*pos - 1);
123
124 return NULL;
125}
126
127static void *lc_next(struct seq_file *m, void *v, loff_t *pos)
128{
129 (*pos)++;
130 return lc_start(m, pos);
131}
132
133static void lc_stop(struct seq_file *m, void *v)
134{
135}
136
137static int lc_show(struct seq_file *m, void *v)
138{
139 struct lock_chain *chain = v;
140 struct lock_class *class;
141 int i;
142
143 if (v == SEQ_START_TOKEN) {
144 seq_printf(m, "all lock chains:\n");
145 return 0;
146 }
147
148 seq_printf(m, "irq_context: %d\n", chain->irq_context);
149
150 for (i = 0; i < chain->depth; i++) {
151 class = lock_chain_get_class(chain, i);
152 if (!class->key)
153 continue;
154
155 seq_printf(m, "[%p] ", class->key);
156 print_name(m, class);
157 seq_puts(m, "\n");
158 }
159 seq_puts(m, "\n");
160
161 return 0;
162}
163
164static const struct seq_operations lockdep_chains_ops = {
165 .start = lc_start,
166 .next = lc_next,
167 .stop = lc_stop,
168 .show = lc_show,
169};
170
171static int lockdep_chains_open(struct inode *inode, struct file *file)
172{
173 return seq_open(file, &lockdep_chains_ops);
174}
175
176static const struct file_operations proc_lockdep_chains_operations = {
177 .open = lockdep_chains_open,
178 .read = seq_read,
179 .llseek = seq_lseek,
180 .release = seq_release,
181};
182#endif /* CONFIG_PROVE_LOCKING */
183
184static void lockdep_stats_debug_show(struct seq_file *m)
185{
186#ifdef CONFIG_DEBUG_LOCKDEP
187 unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
188 hi2 = debug_atomic_read(hardirqs_off_events),
189 hr1 = debug_atomic_read(redundant_hardirqs_on),
190 hr2 = debug_atomic_read(redundant_hardirqs_off),
191 si1 = debug_atomic_read(softirqs_on_events),
192 si2 = debug_atomic_read(softirqs_off_events),
193 sr1 = debug_atomic_read(redundant_softirqs_on),
194 sr2 = debug_atomic_read(redundant_softirqs_off);
195
196 seq_printf(m, " chain lookup misses: %11llu\n",
197 debug_atomic_read(chain_lookup_misses));
198 seq_printf(m, " chain lookup hits: %11llu\n",
199 debug_atomic_read(chain_lookup_hits));
200 seq_printf(m, " cyclic checks: %11llu\n",
201 debug_atomic_read(nr_cyclic_checks));
202 seq_printf(m, " find-mask forwards checks: %11llu\n",
203 debug_atomic_read(nr_find_usage_forwards_checks));
204 seq_printf(m, " find-mask backwards checks: %11llu\n",
205 debug_atomic_read(nr_find_usage_backwards_checks));
206
207 seq_printf(m, " hardirq on events: %11llu\n", hi1);
208 seq_printf(m, " hardirq off events: %11llu\n", hi2);
209 seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
210 seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
211 seq_printf(m, " softirq on events: %11llu\n", si1);
212 seq_printf(m, " softirq off events: %11llu\n", si2);
213 seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
214 seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
215#endif
216}
217
218static int lockdep_stats_show(struct seq_file *m, void *v)
219{
220 struct lock_class *class;
221 unsigned long nr_unused = 0, nr_uncategorized = 0,
222 nr_irq_safe = 0, nr_irq_unsafe = 0,
223 nr_softirq_safe = 0, nr_softirq_unsafe = 0,
224 nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
225 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
226 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
227 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
228 sum_forward_deps = 0;
229
230 list_for_each_entry(class, &all_lock_classes, lock_entry) {
231
232 if (class->usage_mask == 0)
233 nr_unused++;
234 if (class->usage_mask == LOCKF_USED)
235 nr_uncategorized++;
236 if (class->usage_mask & LOCKF_USED_IN_IRQ)
237 nr_irq_safe++;
238 if (class->usage_mask & LOCKF_ENABLED_IRQ)
239 nr_irq_unsafe++;
240 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
241 nr_softirq_safe++;
242 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
243 nr_softirq_unsafe++;
244 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
245 nr_hardirq_safe++;
246 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
247 nr_hardirq_unsafe++;
248 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
249 nr_irq_read_safe++;
250 if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
251 nr_irq_read_unsafe++;
252 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
253 nr_softirq_read_safe++;
254 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
255 nr_softirq_read_unsafe++;
256 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
257 nr_hardirq_read_safe++;
258 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
259 nr_hardirq_read_unsafe++;
260
261#ifdef CONFIG_PROVE_LOCKING
262 sum_forward_deps += lockdep_count_forward_deps(class);
263#endif
264 }
265#ifdef CONFIG_DEBUG_LOCKDEP
266 DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
267#endif
268 seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
269 nr_lock_classes, MAX_LOCKDEP_KEYS);
270 seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
271 nr_list_entries, MAX_LOCKDEP_ENTRIES);
272 seq_printf(m, " indirect dependencies: %11lu\n",
273 sum_forward_deps);
274
275 /*
276 * Total number of dependencies:
277 *
278 * All irq-safe locks may nest inside irq-unsafe locks,
279 * plus all the other known dependencies:
280 */
281 seq_printf(m, " all direct dependencies: %11lu\n",
282 nr_irq_unsafe * nr_irq_safe +
283 nr_hardirq_unsafe * nr_hardirq_safe +
284 nr_list_entries);
285
286#ifdef CONFIG_PROVE_LOCKING
287 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
288 nr_lock_chains, MAX_LOCKDEP_CHAINS);
289 seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n",
290 nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
291#endif
292
293#ifdef CONFIG_TRACE_IRQFLAGS
294 seq_printf(m, " in-hardirq chains: %11u\n",
295 nr_hardirq_chains);
296 seq_printf(m, " in-softirq chains: %11u\n",
297 nr_softirq_chains);
298#endif
299 seq_printf(m, " in-process chains: %11u\n",
300 nr_process_chains);
301 seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
302 nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
303 seq_printf(m, " combined max dependencies: %11u\n",
304 (nr_hardirq_chains + 1) *
305 (nr_softirq_chains + 1) *
306 (nr_process_chains + 1)
307 );
308 seq_printf(m, " hardirq-safe locks: %11lu\n",
309 nr_hardirq_safe);
310 seq_printf(m, " hardirq-unsafe locks: %11lu\n",
311 nr_hardirq_unsafe);
312 seq_printf(m, " softirq-safe locks: %11lu\n",
313 nr_softirq_safe);
314 seq_printf(m, " softirq-unsafe locks: %11lu\n",
315 nr_softirq_unsafe);
316 seq_printf(m, " irq-safe locks: %11lu\n",
317 nr_irq_safe);
318 seq_printf(m, " irq-unsafe locks: %11lu\n",
319 nr_irq_unsafe);
320
321 seq_printf(m, " hardirq-read-safe locks: %11lu\n",
322 nr_hardirq_read_safe);
323 seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
324 nr_hardirq_read_unsafe);
325 seq_printf(m, " softirq-read-safe locks: %11lu\n",
326 nr_softirq_read_safe);
327 seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
328 nr_softirq_read_unsafe);
329 seq_printf(m, " irq-read-safe locks: %11lu\n",
330 nr_irq_read_safe);
331 seq_printf(m, " irq-read-unsafe locks: %11lu\n",
332 nr_irq_read_unsafe);
333
334 seq_printf(m, " uncategorized locks: %11lu\n",
335 nr_uncategorized);
336 seq_printf(m, " unused locks: %11lu\n",
337 nr_unused);
338 seq_printf(m, " max locking depth: %11u\n",
339 max_lockdep_depth);
340#ifdef CONFIG_PROVE_LOCKING
341 seq_printf(m, " max bfs queue depth: %11u\n",
342 max_bfs_queue_depth);
343#endif
344 lockdep_stats_debug_show(m);
345 seq_printf(m, " debug_locks: %11u\n",
346 debug_locks);
347
348 return 0;
349}
350
351static int lockdep_stats_open(struct inode *inode, struct file *file)
352{
353 return single_open(file, lockdep_stats_show, NULL);
354}
355
356static const struct file_operations proc_lockdep_stats_operations = {
357 .open = lockdep_stats_open,
358 .read = seq_read,
359 .llseek = seq_lseek,
360 .release = single_release,
361};
362
363#ifdef CONFIG_LOCK_STAT
364
365struct lock_stat_data {
366 struct lock_class *class;
367 struct lock_class_stats stats;
368};
369
370struct lock_stat_seq {
371 struct lock_stat_data *iter_end;
372 struct lock_stat_data stats[MAX_LOCKDEP_KEYS];
373};
374
375/*
376 * sort on absolute number of contentions
377 */
378static int lock_stat_cmp(const void *l, const void *r)
379{
380 const struct lock_stat_data *dl = l, *dr = r;
381 unsigned long nl, nr;
382
383 nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr;
384 nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr;
385
386 return nr - nl;
387}
388
389static void seq_line(struct seq_file *m, char c, int offset, int length)
390{
391 int i;
392
393 for (i = 0; i < offset; i++)
394 seq_puts(m, " ");
395 for (i = 0; i < length; i++)
396 seq_printf(m, "%c", c);
397 seq_puts(m, "\n");
398}
399
400static void snprint_time(char *buf, size_t bufsiz, s64 nr)
401{
402 s64 div;
403 s32 rem;
404
405 nr += 5; /* for display rounding */
406 div = div_s64_rem(nr, 1000, &rem);
407 snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
408}
409
410static void seq_time(struct seq_file *m, s64 time)
411{
412 char num[15];
413
414 snprint_time(num, sizeof(num), time);
415 seq_printf(m, " %14s", num);
416}
417
418static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
419{
420 seq_printf(m, "%14lu", lt->nr);
421 seq_time(m, lt->min);
422 seq_time(m, lt->max);
423 seq_time(m, lt->total);
424 seq_time(m, lt->nr ? div_s64(lt->total, lt->nr) : 0);
425}
426
427static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
428{
429 char name[39];
430 struct lock_class *class;
431 struct lock_class_stats *stats;
432 int i, namelen;
433
434 class = data->class;
435 stats = &data->stats;
436
437 namelen = 38;
438 if (class->name_version > 1)
439 namelen -= 2; /* XXX truncates versions > 9 */
440 if (class->subclass)
441 namelen -= 2;
442
443 if (!class->name) {
444 char str[KSYM_NAME_LEN];
445 const char *key_name;
446
447 key_name = __get_key_name(class->key, str);
448 snprintf(name, namelen, "%s", key_name);
449 } else {
450 snprintf(name, namelen, "%s", class->name);
451 }
452 namelen = strlen(name);
453 if (class->name_version > 1) {
454 snprintf(name+namelen, 3, "#%d", class->name_version);
455 namelen += 2;
456 }
457 if (class->subclass) {
458 snprintf(name+namelen, 3, "/%d", class->subclass);
459 namelen += 2;
460 }
461
462 if (stats->write_holdtime.nr) {
463 if (stats->read_holdtime.nr)
464 seq_printf(m, "%38s-W:", name);
465 else
466 seq_printf(m, "%40s:", name);
467
468 seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
469 seq_lock_time(m, &stats->write_waittime);
470 seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
471 seq_lock_time(m, &stats->write_holdtime);
472 seq_puts(m, "\n");
473 }
474
475 if (stats->read_holdtime.nr) {
476 seq_printf(m, "%38s-R:", name);
477 seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]);
478 seq_lock_time(m, &stats->read_waittime);
479 seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]);
480 seq_lock_time(m, &stats->read_holdtime);
481 seq_puts(m, "\n");
482 }
483
484 if (stats->read_waittime.nr + stats->write_waittime.nr == 0)
485 return;
486
487 if (stats->read_holdtime.nr)
488 namelen += 2;
489
490 for (i = 0; i < LOCKSTAT_POINTS; i++) {
491 char ip[32];
492
493 if (class->contention_point[i] == 0)
494 break;
495
496 if (!i)
497 seq_line(m, '-', 40-namelen, namelen);
498
499 snprintf(ip, sizeof(ip), "[<%p>]",
500 (void *)class->contention_point[i]);
501 seq_printf(m, "%40s %14lu %29s %pS\n",
502 name, stats->contention_point[i],
503 ip, (void *)class->contention_point[i]);
504 }
505 for (i = 0; i < LOCKSTAT_POINTS; i++) {
506 char ip[32];
507
508 if (class->contending_point[i] == 0)
509 break;
510
511 if (!i)
512 seq_line(m, '-', 40-namelen, namelen);
513
514 snprintf(ip, sizeof(ip), "[<%p>]",
515 (void *)class->contending_point[i]);
516 seq_printf(m, "%40s %14lu %29s %pS\n",
517 name, stats->contending_point[i],
518 ip, (void *)class->contending_point[i]);
519 }
520 if (i) {
521 seq_puts(m, "\n");
522 seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1));
523 seq_puts(m, "\n");
524 }
525}
526
527static void seq_header(struct seq_file *m)
528{
529 seq_puts(m, "lock_stat version 0.4\n");
530
531 if (unlikely(!debug_locks))
532 seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
533
534 seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
535 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
536 "%14s %14s\n",
537 "class name",
538 "con-bounces",
539 "contentions",
540 "waittime-min",
541 "waittime-max",
542 "waittime-total",
543 "waittime-avg",
544 "acq-bounces",
545 "acquisitions",
546 "holdtime-min",
547 "holdtime-max",
548 "holdtime-total",
549 "holdtime-avg");
550 seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
551 seq_printf(m, "\n");
552}
553
554static void *ls_start(struct seq_file *m, loff_t *pos)
555{
556 struct lock_stat_seq *data = m->private;
557 struct lock_stat_data *iter;
558
559 if (*pos == 0)
560 return SEQ_START_TOKEN;
561
562 iter = data->stats + (*pos - 1);
563 if (iter >= data->iter_end)
564 iter = NULL;
565
566 return iter;
567}
568
569static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
570{
571 (*pos)++;
572 return ls_start(m, pos);
573}
574
575static void ls_stop(struct seq_file *m, void *v)
576{
577}
578
579static int ls_show(struct seq_file *m, void *v)
580{
581 if (v == SEQ_START_TOKEN)
582 seq_header(m);
583 else
584 seq_stats(m, v);
585
586 return 0;
587}
588
589static const struct seq_operations lockstat_ops = {
590 .start = ls_start,
591 .next = ls_next,
592 .stop = ls_stop,
593 .show = ls_show,
594};
595
596static int lock_stat_open(struct inode *inode, struct file *file)
597{
598 int res;
599 struct lock_class *class;
600 struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq));
601
602 if (!data)
603 return -ENOMEM;
604
605 res = seq_open(file, &lockstat_ops);
606 if (!res) {
607 struct lock_stat_data *iter = data->stats;
608 struct seq_file *m = file->private_data;
609
610 list_for_each_entry(class, &all_lock_classes, lock_entry) {
611 iter->class = class;
612 iter->stats = lock_stats(class);
613 iter++;
614 }
615 data->iter_end = iter;
616
617 sort(data->stats, data->iter_end - data->stats,
618 sizeof(struct lock_stat_data),
619 lock_stat_cmp, NULL);
620
621 m->private = data;
622 } else
623 vfree(data);
624
625 return res;
626}
627
628static ssize_t lock_stat_write(struct file *file, const char __user *buf,
629 size_t count, loff_t *ppos)
630{
631 struct lock_class *class;
632 char c;
633
634 if (count) {
635 if (get_user(c, buf))
636 return -EFAULT;
637
638 if (c != '0')
639 return count;
640
641 list_for_each_entry(class, &all_lock_classes, lock_entry)
642 clear_lock_stats(class);
643 }
644 return count;
645}
646
647static int lock_stat_release(struct inode *inode, struct file *file)
648{
649 struct seq_file *seq = file->private_data;
650
651 vfree(seq->private);
652 return seq_release(inode, file);
653}
654
655static const struct file_operations proc_lock_stat_operations = {
656 .open = lock_stat_open,
657 .write = lock_stat_write,
658 .read = seq_read,
659 .llseek = seq_lseek,
660 .release = lock_stat_release,
661};
662#endif /* CONFIG_LOCK_STAT */
663
664static int __init lockdep_proc_init(void)
665{
666 proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations);
667#ifdef CONFIG_PROVE_LOCKING
668 proc_create("lockdep_chains", S_IRUSR, NULL,
669 &proc_lockdep_chains_operations);
670#endif
671 proc_create("lockdep_stats", S_IRUSR, NULL,
672 &proc_lockdep_stats_operations);
673
674#ifdef CONFIG_LOCK_STAT
675 proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
676 &proc_lock_stat_operations);
677#endif
678
679 return 0;
680}
681
682__initcall(lockdep_proc_init);
683
diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h
new file mode 100644
index 000000000000..995b0cc2b84c
--- /dev/null
+++ b/kernel/locking/lockdep_states.h
@@ -0,0 +1,9 @@
1/*
2 * Lockdep states,
3 *
4 * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
5 * you add one, or come up with a nice dynamic solution.
6 */
7LOCKDEP_STATE(HARDIRQ)
8LOCKDEP_STATE(SOFTIRQ)
9LOCKDEP_STATE(RECLAIM_FS)
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
new file mode 100644
index 000000000000..7e3443fe1f48
--- /dev/null
+++ b/kernel/locking/mutex-debug.c
@@ -0,0 +1,110 @@
1/*
2 * kernel/mutex-debug.c
3 *
4 * Debugging code for mutexes
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * lock debugging, locking tree, deadlock detection started by:
11 *
12 * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
13 * Released under the General Public License (GPL).
14 */
15#include <linux/mutex.h>
16#include <linux/delay.h>
17#include <linux/export.h>
18#include <linux/poison.h>
19#include <linux/sched.h>
20#include <linux/spinlock.h>
21#include <linux/kallsyms.h>
22#include <linux/interrupt.h>
23#include <linux/debug_locks.h>
24
25#include "mutex-debug.h"
26
27/*
28 * Must be called with lock->wait_lock held.
29 */
30void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
31{
32 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
33 waiter->magic = waiter;
34 INIT_LIST_HEAD(&waiter->list);
35}
36
37void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
38{
39 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
40 DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
41 DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
42 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
43}
44
45void debug_mutex_free_waiter(struct mutex_waiter *waiter)
46{
47 DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
48 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
49}
50
51void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
52 struct thread_info *ti)
53{
54 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
55
56 /* Mark the current thread as blocked on the lock: */
57 ti->task->blocked_on = waiter;
58}
59
60void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
61 struct thread_info *ti)
62{
63 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
64 DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
65 DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
66 ti->task->blocked_on = NULL;
67
68 list_del_init(&waiter->list);
69 waiter->task = NULL;
70}
71
72void debug_mutex_unlock(struct mutex *lock)
73{
74 if (unlikely(!debug_locks))
75 return;
76
77 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
78 DEBUG_LOCKS_WARN_ON(lock->owner != current);
79 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
80 mutex_clear_owner(lock);
81}
82
83void debug_mutex_init(struct mutex *lock, const char *name,
84 struct lock_class_key *key)
85{
86#ifdef CONFIG_DEBUG_LOCK_ALLOC
87 /*
88 * Make sure we are not reinitializing a held lock:
89 */
90 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
91 lockdep_init_map(&lock->dep_map, name, key, 0);
92#endif
93 lock->magic = lock;
94}
95
96/***
97 * mutex_destroy - mark a mutex unusable
98 * @lock: the mutex to be destroyed
99 *
100 * This function marks the mutex uninitialized, and any subsequent
101 * use of the mutex is forbidden. The mutex must not be locked when
102 * this function is called.
103 */
104void mutex_destroy(struct mutex *lock)
105{
106 DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
107 lock->magic = NULL;
108}
109
110EXPORT_SYMBOL_GPL(mutex_destroy);
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
new file mode 100644
index 000000000000..0799fd3e4cfa
--- /dev/null
+++ b/kernel/locking/mutex-debug.h
@@ -0,0 +1,55 @@
1/*
2 * Mutexes: blocking mutual exclusion locks
3 *
4 * started by Ingo Molnar:
5 *
6 * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 *
8 * This file contains mutex debugging related internal declarations,
9 * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
10 * More details are in kernel/mutex-debug.c.
11 */
12
13/*
14 * This must be called with lock->wait_lock held.
15 */
16extern void debug_mutex_lock_common(struct mutex *lock,
17 struct mutex_waiter *waiter);
18extern void debug_mutex_wake_waiter(struct mutex *lock,
19 struct mutex_waiter *waiter);
20extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
21extern void debug_mutex_add_waiter(struct mutex *lock,
22 struct mutex_waiter *waiter,
23 struct thread_info *ti);
24extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
25 struct thread_info *ti);
26extern void debug_mutex_unlock(struct mutex *lock);
27extern void debug_mutex_init(struct mutex *lock, const char *name,
28 struct lock_class_key *key);
29
30static inline void mutex_set_owner(struct mutex *lock)
31{
32 lock->owner = current;
33}
34
35static inline void mutex_clear_owner(struct mutex *lock)
36{
37 lock->owner = NULL;
38}
39
40#define spin_lock_mutex(lock, flags) \
41 do { \
42 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
43 \
44 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
45 local_irq_save(flags); \
46 arch_spin_lock(&(lock)->rlock.raw_lock);\
47 DEBUG_LOCKS_WARN_ON(l->magic != l); \
48 } while (0)
49
50#define spin_unlock_mutex(lock, flags) \
51 do { \
52 arch_spin_unlock(&(lock)->rlock.raw_lock); \
53 local_irq_restore(flags); \
54 preempt_check_resched(); \
55 } while (0)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
new file mode 100644
index 000000000000..4dd6e4c219de
--- /dev/null
+++ b/kernel/locking/mutex.c
@@ -0,0 +1,960 @@
1/*
2 * kernel/locking/mutex.c
3 *
4 * Mutexes: blocking mutual exclusion locks
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
11 * David Howells for suggestions and improvements.
12 *
13 * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
14 * from the -rt tree, where it was originally implemented for rtmutexes
15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
16 * and Sven Dietrich.
17 *
18 * Also see Documentation/mutex-design.txt.
19 */
20#include <linux/mutex.h>
21#include <linux/ww_mutex.h>
22#include <linux/sched.h>
23#include <linux/sched/rt.h>
24#include <linux/export.h>
25#include <linux/spinlock.h>
26#include <linux/interrupt.h>
27#include <linux/debug_locks.h>
28
29/*
30 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
31 * which forces all calls into the slowpath:
32 */
33#ifdef CONFIG_DEBUG_MUTEXES
34# include "mutex-debug.h"
35# include <asm-generic/mutex-null.h>
36#else
37# include "mutex.h"
38# include <asm/mutex.h>
39#endif
40
41/*
42 * A negative mutex count indicates that waiters are sleeping waiting for the
43 * mutex.
44 */
45#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0)
46
47void
48__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
49{
50 atomic_set(&lock->count, 1);
51 spin_lock_init(&lock->wait_lock);
52 INIT_LIST_HEAD(&lock->wait_list);
53 mutex_clear_owner(lock);
54#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
55 lock->spin_mlock = NULL;
56#endif
57
58 debug_mutex_init(lock, name, key);
59}
60
61EXPORT_SYMBOL(__mutex_init);
62
63#ifndef CONFIG_DEBUG_LOCK_ALLOC
64/*
65 * We split the mutex lock/unlock logic into separate fastpath and
66 * slowpath functions, to reduce the register pressure on the fastpath.
67 * We also put the fastpath first in the kernel image, to make sure the
68 * branch is predicted by the CPU as default-untaken.
69 */
70static __used noinline void __sched
71__mutex_lock_slowpath(atomic_t *lock_count);
72
73/**
74 * mutex_lock - acquire the mutex
75 * @lock: the mutex to be acquired
76 *
77 * Lock the mutex exclusively for this task. If the mutex is not
78 * available right now, it will sleep until it can get it.
79 *
80 * The mutex must later on be released by the same task that
81 * acquired it. Recursive locking is not allowed. The task
82 * may not exit without first unlocking the mutex. Also, kernel
83 * memory where the mutex resides mutex must not be freed with
84 * the mutex still locked. The mutex must first be initialized
85 * (or statically defined) before it can be locked. memset()-ing
86 * the mutex to 0 is not allowed.
87 *
88 * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
89 * checks that will enforce the restrictions and will also do
90 * deadlock debugging. )
91 *
92 * This function is similar to (but not equivalent to) down().
93 */
94void __sched mutex_lock(struct mutex *lock)
95{
96 might_sleep();
97 /*
98 * The locking fastpath is the 1->0 transition from
99 * 'unlocked' into 'locked' state.
100 */
101 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
102 mutex_set_owner(lock);
103}
104
105EXPORT_SYMBOL(mutex_lock);
106#endif
107
108#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
109/*
110 * In order to avoid a stampede of mutex spinners from acquiring the mutex
111 * more or less simultaneously, the spinners need to acquire a MCS lock
112 * first before spinning on the owner field.
113 *
114 * We don't inline mspin_lock() so that perf can correctly account for the
115 * time spent in this lock function.
116 */
117struct mspin_node {
118 struct mspin_node *next ;
119 int locked; /* 1 if lock acquired */
120};
121#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock))
122
123static noinline
124void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
125{
126 struct mspin_node *prev;
127
128 /* Init node */
129 node->locked = 0;
130 node->next = NULL;
131
132 prev = xchg(lock, node);
133 if (likely(prev == NULL)) {
134 /* Lock acquired */
135 node->locked = 1;
136 return;
137 }
138 ACCESS_ONCE(prev->next) = node;
139 smp_wmb();
140 /* Wait until the lock holder passes the lock down */
141 while (!ACCESS_ONCE(node->locked))
142 arch_mutex_cpu_relax();
143}
144
145static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
146{
147 struct mspin_node *next = ACCESS_ONCE(node->next);
148
149 if (likely(!next)) {
150 /*
151 * Release the lock by setting it to NULL
152 */
153 if (cmpxchg(lock, node, NULL) == node)
154 return;
155 /* Wait until the next pointer is set */
156 while (!(next = ACCESS_ONCE(node->next)))
157 arch_mutex_cpu_relax();
158 }
159 ACCESS_ONCE(next->locked) = 1;
160 smp_wmb();
161}
162
163/*
164 * Mutex spinning code migrated from kernel/sched/core.c
165 */
166
167static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
168{
169 if (lock->owner != owner)
170 return false;
171
172 /*
173 * Ensure we emit the owner->on_cpu, dereference _after_ checking
174 * lock->owner still matches owner, if that fails, owner might
175 * point to free()d memory, if it still matches, the rcu_read_lock()
176 * ensures the memory stays valid.
177 */
178 barrier();
179
180 return owner->on_cpu;
181}
182
183/*
184 * Look out! "owner" is an entirely speculative pointer
185 * access and not reliable.
186 */
187static noinline
188int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
189{
190 rcu_read_lock();
191 while (owner_running(lock, owner)) {
192 if (need_resched())
193 break;
194
195 arch_mutex_cpu_relax();
196 }
197 rcu_read_unlock();
198
199 /*
200 * We break out the loop above on need_resched() and when the
201 * owner changed, which is a sign for heavy contention. Return
202 * success only when lock->owner is NULL.
203 */
204 return lock->owner == NULL;
205}
206
207/*
208 * Initial check for entering the mutex spinning loop
209 */
210static inline int mutex_can_spin_on_owner(struct mutex *lock)
211{
212 struct task_struct *owner;
213 int retval = 1;
214
215 rcu_read_lock();
216 owner = ACCESS_ONCE(lock->owner);
217 if (owner)
218 retval = owner->on_cpu;
219 rcu_read_unlock();
220 /*
221 * if lock->owner is not set, the mutex owner may have just acquired
222 * it and not set the owner yet or the mutex has been released.
223 */
224 return retval;
225}
226#endif
227
228static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
229
230/**
231 * mutex_unlock - release the mutex
232 * @lock: the mutex to be released
233 *
234 * Unlock a mutex that has been locked by this task previously.
235 *
236 * This function must not be used in interrupt context. Unlocking
237 * of a not locked mutex is not allowed.
238 *
239 * This function is similar to (but not equivalent to) up().
240 */
241void __sched mutex_unlock(struct mutex *lock)
242{
243 /*
244 * The unlocking fastpath is the 0->1 transition from 'locked'
245 * into 'unlocked' state:
246 */
247#ifndef CONFIG_DEBUG_MUTEXES
248 /*
249 * When debugging is enabled we must not clear the owner before time,
250 * the slow path will always be taken, and that clears the owner field
251 * after verifying that it was indeed current.
252 */
253 mutex_clear_owner(lock);
254#endif
255 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
256}
257
258EXPORT_SYMBOL(mutex_unlock);
259
260/**
261 * ww_mutex_unlock - release the w/w mutex
262 * @lock: the mutex to be released
263 *
264 * Unlock a mutex that has been locked by this task previously with any of the
265 * ww_mutex_lock* functions (with or without an acquire context). It is
266 * forbidden to release the locks after releasing the acquire context.
267 *
268 * This function must not be used in interrupt context. Unlocking
269 * of a unlocked mutex is not allowed.
270 */
271void __sched ww_mutex_unlock(struct ww_mutex *lock)
272{
273 /*
274 * The unlocking fastpath is the 0->1 transition from 'locked'
275 * into 'unlocked' state:
276 */
277 if (lock->ctx) {
278#ifdef CONFIG_DEBUG_MUTEXES
279 DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
280#endif
281 if (lock->ctx->acquired > 0)
282 lock->ctx->acquired--;
283 lock->ctx = NULL;
284 }
285
286#ifndef CONFIG_DEBUG_MUTEXES
287 /*
288 * When debugging is enabled we must not clear the owner before time,
289 * the slow path will always be taken, and that clears the owner field
290 * after verifying that it was indeed current.
291 */
292 mutex_clear_owner(&lock->base);
293#endif
294 __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
295}
296EXPORT_SYMBOL(ww_mutex_unlock);
297
298static inline int __sched
299__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
300{
301 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
302 struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
303
304 if (!hold_ctx)
305 return 0;
306
307 if (unlikely(ctx == hold_ctx))
308 return -EALREADY;
309
310 if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
311 (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
312#ifdef CONFIG_DEBUG_MUTEXES
313 DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
314 ctx->contending_lock = ww;
315#endif
316 return -EDEADLK;
317 }
318
319 return 0;
320}
321
322static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
323 struct ww_acquire_ctx *ww_ctx)
324{
325#ifdef CONFIG_DEBUG_MUTEXES
326 /*
327 * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
328 * but released with a normal mutex_unlock in this call.
329 *
330 * This should never happen, always use ww_mutex_unlock.
331 */
332 DEBUG_LOCKS_WARN_ON(ww->ctx);
333
334 /*
335 * Not quite done after calling ww_acquire_done() ?
336 */
337 DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
338
339 if (ww_ctx->contending_lock) {
340 /*
341 * After -EDEADLK you tried to
342 * acquire a different ww_mutex? Bad!
343 */
344 DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
345
346 /*
347 * You called ww_mutex_lock after receiving -EDEADLK,
348 * but 'forgot' to unlock everything else first?
349 */
350 DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
351 ww_ctx->contending_lock = NULL;
352 }
353
354 /*
355 * Naughty, using a different class will lead to undefined behavior!
356 */
357 DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
358#endif
359 ww_ctx->acquired++;
360}
361
362/*
363 * after acquiring lock with fastpath or when we lost out in contested
364 * slowpath, set ctx and wake up any waiters so they can recheck.
365 *
366 * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
367 * as the fastpath and opportunistic spinning are disabled in that case.
368 */
369static __always_inline void
370ww_mutex_set_context_fastpath(struct ww_mutex *lock,
371 struct ww_acquire_ctx *ctx)
372{
373 unsigned long flags;
374 struct mutex_waiter *cur;
375
376 ww_mutex_lock_acquired(lock, ctx);
377
378 lock->ctx = ctx;
379
380 /*
381 * The lock->ctx update should be visible on all cores before
382 * the atomic read is done, otherwise contended waiters might be
383 * missed. The contended waiters will either see ww_ctx == NULL
384 * and keep spinning, or it will acquire wait_lock, add itself
385 * to waiter list and sleep.
386 */
387 smp_mb(); /* ^^^ */
388
389 /*
390 * Check if lock is contended, if not there is nobody to wake up
391 */
392 if (likely(atomic_read(&lock->base.count) == 0))
393 return;
394
395 /*
396 * Uh oh, we raced in fastpath, wake up everyone in this case,
397 * so they can see the new lock->ctx.
398 */
399 spin_lock_mutex(&lock->base.wait_lock, flags);
400 list_for_each_entry(cur, &lock->base.wait_list, list) {
401 debug_mutex_wake_waiter(&lock->base, cur);
402 wake_up_process(cur->task);
403 }
404 spin_unlock_mutex(&lock->base.wait_lock, flags);
405}
406
407/*
408 * Lock a mutex (possibly interruptible), slowpath:
409 */
410static __always_inline int __sched
411__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
412 struct lockdep_map *nest_lock, unsigned long ip,
413 struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
414{
415 struct task_struct *task = current;
416 struct mutex_waiter waiter;
417 unsigned long flags;
418 int ret;
419
420 preempt_disable();
421 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
422
423#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
424 /*
425 * Optimistic spinning.
426 *
427 * We try to spin for acquisition when we find that there are no
428 * pending waiters and the lock owner is currently running on a
429 * (different) CPU.
430 *
431 * The rationale is that if the lock owner is running, it is likely to
432 * release the lock soon.
433 *
434 * Since this needs the lock owner, and this mutex implementation
435 * doesn't track the owner atomically in the lock field, we need to
436 * track it non-atomically.
437 *
438 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
439 * to serialize everything.
440 *
441 * The mutex spinners are queued up using MCS lock so that only one
442 * spinner can compete for the mutex. However, if mutex spinning isn't
443 * going to happen, there is no point in going through the lock/unlock
444 * overhead.
445 */
446 if (!mutex_can_spin_on_owner(lock))
447 goto slowpath;
448
449 for (;;) {
450 struct task_struct *owner;
451 struct mspin_node node;
452
453 if (use_ww_ctx && ww_ctx->acquired > 0) {
454 struct ww_mutex *ww;
455
456 ww = container_of(lock, struct ww_mutex, base);
457 /*
458 * If ww->ctx is set the contents are undefined, only
459 * by acquiring wait_lock there is a guarantee that
460 * they are not invalid when reading.
461 *
462 * As such, when deadlock detection needs to be
463 * performed the optimistic spinning cannot be done.
464 */
465 if (ACCESS_ONCE(ww->ctx))
466 goto slowpath;
467 }
468
469 /*
470 * If there's an owner, wait for it to either
471 * release the lock or go to sleep.
472 */
473 mspin_lock(MLOCK(lock), &node);
474 owner = ACCESS_ONCE(lock->owner);
475 if (owner && !mutex_spin_on_owner(lock, owner)) {
476 mspin_unlock(MLOCK(lock), &node);
477 goto slowpath;
478 }
479
480 if ((atomic_read(&lock->count) == 1) &&
481 (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
482 lock_acquired(&lock->dep_map, ip);
483 if (use_ww_ctx) {
484 struct ww_mutex *ww;
485 ww = container_of(lock, struct ww_mutex, base);
486
487 ww_mutex_set_context_fastpath(ww, ww_ctx);
488 }
489
490 mutex_set_owner(lock);
491 mspin_unlock(MLOCK(lock), &node);
492 preempt_enable();
493 return 0;
494 }
495 mspin_unlock(MLOCK(lock), &node);
496
497 /*
498 * When there's no owner, we might have preempted between the
499 * owner acquiring the lock and setting the owner field. If
500 * we're an RT task that will live-lock because we won't let
501 * the owner complete.
502 */
503 if (!owner && (need_resched() || rt_task(task)))
504 goto slowpath;
505
506 /*
507 * The cpu_relax() call is a compiler barrier which forces
508 * everything in this loop to be re-loaded. We don't need
509 * memory barriers as we'll eventually observe the right
510 * values at the cost of a few extra spins.
511 */
512 arch_mutex_cpu_relax();
513 }
514slowpath:
515#endif
516 spin_lock_mutex(&lock->wait_lock, flags);
517
518 /* once more, can we acquire the lock? */
519 if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1))
520 goto skip_wait;
521
522 debug_mutex_lock_common(lock, &waiter);
523 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
524
525 /* add waiting tasks to the end of the waitqueue (FIFO): */
526 list_add_tail(&waiter.list, &lock->wait_list);
527 waiter.task = task;
528
529 lock_contended(&lock->dep_map, ip);
530
531 for (;;) {
532 /*
533 * Lets try to take the lock again - this is needed even if
534 * we get here for the first time (shortly after failing to
535 * acquire the lock), to make sure that we get a wakeup once
536 * it's unlocked. Later on, if we sleep, this is the
537 * operation that gives us the lock. We xchg it to -1, so
538 * that when we release the lock, we properly wake up the
539 * other waiters:
540 */
541 if (MUTEX_SHOW_NO_WAITER(lock) &&
542 (atomic_xchg(&lock->count, -1) == 1))
543 break;
544
545 /*
546 * got a signal? (This code gets eliminated in the
547 * TASK_UNINTERRUPTIBLE case.)
548 */
549 if (unlikely(signal_pending_state(state, task))) {
550 ret = -EINTR;
551 goto err;
552 }
553
554 if (use_ww_ctx && ww_ctx->acquired > 0) {
555 ret = __mutex_lock_check_stamp(lock, ww_ctx);
556 if (ret)
557 goto err;
558 }
559
560 __set_task_state(task, state);
561
562 /* didn't get the lock, go to sleep: */
563 spin_unlock_mutex(&lock->wait_lock, flags);
564 schedule_preempt_disabled();
565 spin_lock_mutex(&lock->wait_lock, flags);
566 }
567 mutex_remove_waiter(lock, &waiter, current_thread_info());
568 /* set it to 0 if there are no waiters left: */
569 if (likely(list_empty(&lock->wait_list)))
570 atomic_set(&lock->count, 0);
571 debug_mutex_free_waiter(&waiter);
572
573skip_wait:
574 /* got the lock - cleanup and rejoice! */
575 lock_acquired(&lock->dep_map, ip);
576 mutex_set_owner(lock);
577
578 if (use_ww_ctx) {
579 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
580 struct mutex_waiter *cur;
581
582 /*
583 * This branch gets optimized out for the common case,
584 * and is only important for ww_mutex_lock.
585 */
586 ww_mutex_lock_acquired(ww, ww_ctx);
587 ww->ctx = ww_ctx;
588
589 /*
590 * Give any possible sleeping processes the chance to wake up,
591 * so they can recheck if they have to back off.
592 */
593 list_for_each_entry(cur, &lock->wait_list, list) {
594 debug_mutex_wake_waiter(lock, cur);
595 wake_up_process(cur->task);
596 }
597 }
598
599 spin_unlock_mutex(&lock->wait_lock, flags);
600 preempt_enable();
601 return 0;
602
603err:
604 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
605 spin_unlock_mutex(&lock->wait_lock, flags);
606 debug_mutex_free_waiter(&waiter);
607 mutex_release(&lock->dep_map, 1, ip);
608 preempt_enable();
609 return ret;
610}
611
612#ifdef CONFIG_DEBUG_LOCK_ALLOC
613void __sched
614mutex_lock_nested(struct mutex *lock, unsigned int subclass)
615{
616 might_sleep();
617 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
618 subclass, NULL, _RET_IP_, NULL, 0);
619}
620
621EXPORT_SYMBOL_GPL(mutex_lock_nested);
622
623void __sched
624_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
625{
626 might_sleep();
627 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
628 0, nest, _RET_IP_, NULL, 0);
629}
630
631EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
632
633int __sched
634mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
635{
636 might_sleep();
637 return __mutex_lock_common(lock, TASK_KILLABLE,
638 subclass, NULL, _RET_IP_, NULL, 0);
639}
640EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
641
642int __sched
643mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
644{
645 might_sleep();
646 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
647 subclass, NULL, _RET_IP_, NULL, 0);
648}
649
650EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
651
652static inline int
653ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
654{
655#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
656 unsigned tmp;
657
658 if (ctx->deadlock_inject_countdown-- == 0) {
659 tmp = ctx->deadlock_inject_interval;
660 if (tmp > UINT_MAX/4)
661 tmp = UINT_MAX;
662 else
663 tmp = tmp*2 + tmp + tmp/2;
664
665 ctx->deadlock_inject_interval = tmp;
666 ctx->deadlock_inject_countdown = tmp;
667 ctx->contending_lock = lock;
668
669 ww_mutex_unlock(lock);
670
671 return -EDEADLK;
672 }
673#endif
674
675 return 0;
676}
677
678int __sched
679__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
680{
681 int ret;
682
683 might_sleep();
684 ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
685 0, &ctx->dep_map, _RET_IP_, ctx, 1);
686 if (!ret && ctx->acquired > 1)
687 return ww_mutex_deadlock_injection(lock, ctx);
688
689 return ret;
690}
691EXPORT_SYMBOL_GPL(__ww_mutex_lock);
692
693int __sched
694__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
695{
696 int ret;
697
698 might_sleep();
699 ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
700 0, &ctx->dep_map, _RET_IP_, ctx, 1);
701
702 if (!ret && ctx->acquired > 1)
703 return ww_mutex_deadlock_injection(lock, ctx);
704
705 return ret;
706}
707EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
708
709#endif
710
711/*
712 * Release the lock, slowpath:
713 */
714static inline void
715__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
716{
717 struct mutex *lock = container_of(lock_count, struct mutex, count);
718 unsigned long flags;
719
720 spin_lock_mutex(&lock->wait_lock, flags);
721 mutex_release(&lock->dep_map, nested, _RET_IP_);
722 debug_mutex_unlock(lock);
723
724 /*
725 * some architectures leave the lock unlocked in the fastpath failure
726 * case, others need to leave it locked. In the later case we have to
727 * unlock it here
728 */
729 if (__mutex_slowpath_needs_to_unlock())
730 atomic_set(&lock->count, 1);
731
732 if (!list_empty(&lock->wait_list)) {
733 /* get the first entry from the wait-list: */
734 struct mutex_waiter *waiter =
735 list_entry(lock->wait_list.next,
736 struct mutex_waiter, list);
737
738 debug_mutex_wake_waiter(lock, waiter);
739
740 wake_up_process(waiter->task);
741 }
742
743 spin_unlock_mutex(&lock->wait_lock, flags);
744}
745
746/*
747 * Release the lock, slowpath:
748 */
749static __used noinline void
750__mutex_unlock_slowpath(atomic_t *lock_count)
751{
752 __mutex_unlock_common_slowpath(lock_count, 1);
753}
754
755#ifndef CONFIG_DEBUG_LOCK_ALLOC
756/*
757 * Here come the less common (and hence less performance-critical) APIs:
758 * mutex_lock_interruptible() and mutex_trylock().
759 */
760static noinline int __sched
761__mutex_lock_killable_slowpath(struct mutex *lock);
762
763static noinline int __sched
764__mutex_lock_interruptible_slowpath(struct mutex *lock);
765
766/**
767 * mutex_lock_interruptible - acquire the mutex, interruptible
768 * @lock: the mutex to be acquired
769 *
770 * Lock the mutex like mutex_lock(), and return 0 if the mutex has
771 * been acquired or sleep until the mutex becomes available. If a
772 * signal arrives while waiting for the lock then this function
773 * returns -EINTR.
774 *
775 * This function is similar to (but not equivalent to) down_interruptible().
776 */
777int __sched mutex_lock_interruptible(struct mutex *lock)
778{
779 int ret;
780
781 might_sleep();
782 ret = __mutex_fastpath_lock_retval(&lock->count);
783 if (likely(!ret)) {
784 mutex_set_owner(lock);
785 return 0;
786 } else
787 return __mutex_lock_interruptible_slowpath(lock);
788}
789
790EXPORT_SYMBOL(mutex_lock_interruptible);
791
792int __sched mutex_lock_killable(struct mutex *lock)
793{
794 int ret;
795
796 might_sleep();
797 ret = __mutex_fastpath_lock_retval(&lock->count);
798 if (likely(!ret)) {
799 mutex_set_owner(lock);
800 return 0;
801 } else
802 return __mutex_lock_killable_slowpath(lock);
803}
804EXPORT_SYMBOL(mutex_lock_killable);
805
806static __used noinline void __sched
807__mutex_lock_slowpath(atomic_t *lock_count)
808{
809 struct mutex *lock = container_of(lock_count, struct mutex, count);
810
811 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
812 NULL, _RET_IP_, NULL, 0);
813}
814
815static noinline int __sched
816__mutex_lock_killable_slowpath(struct mutex *lock)
817{
818 return __mutex_lock_common(lock, TASK_KILLABLE, 0,
819 NULL, _RET_IP_, NULL, 0);
820}
821
822static noinline int __sched
823__mutex_lock_interruptible_slowpath(struct mutex *lock)
824{
825 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0,
826 NULL, _RET_IP_, NULL, 0);
827}
828
829static noinline int __sched
830__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
831{
832 return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0,
833 NULL, _RET_IP_, ctx, 1);
834}
835
836static noinline int __sched
837__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
838 struct ww_acquire_ctx *ctx)
839{
840 return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
841 NULL, _RET_IP_, ctx, 1);
842}
843
844#endif
845
846/*
847 * Spinlock based trylock, we take the spinlock and check whether we
848 * can get the lock:
849 */
850static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
851{
852 struct mutex *lock = container_of(lock_count, struct mutex, count);
853 unsigned long flags;
854 int prev;
855
856 spin_lock_mutex(&lock->wait_lock, flags);
857
858 prev = atomic_xchg(&lock->count, -1);
859 if (likely(prev == 1)) {
860 mutex_set_owner(lock);
861 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
862 }
863
864 /* Set it back to 0 if there are no waiters: */
865 if (likely(list_empty(&lock->wait_list)))
866 atomic_set(&lock->count, 0);
867
868 spin_unlock_mutex(&lock->wait_lock, flags);
869
870 return prev == 1;
871}
872
873/**
874 * mutex_trylock - try to acquire the mutex, without waiting
875 * @lock: the mutex to be acquired
876 *
877 * Try to acquire the mutex atomically. Returns 1 if the mutex
878 * has been acquired successfully, and 0 on contention.
879 *
880 * NOTE: this function follows the spin_trylock() convention, so
881 * it is negated from the down_trylock() return values! Be careful
882 * about this when converting semaphore users to mutexes.
883 *
884 * This function must not be used in interrupt context. The
885 * mutex must be released by the same task that acquired it.
886 */
887int __sched mutex_trylock(struct mutex *lock)
888{
889 int ret;
890
891 ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
892 if (ret)
893 mutex_set_owner(lock);
894
895 return ret;
896}
897EXPORT_SYMBOL(mutex_trylock);
898
899#ifndef CONFIG_DEBUG_LOCK_ALLOC
900int __sched
901__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
902{
903 int ret;
904
905 might_sleep();
906
907 ret = __mutex_fastpath_lock_retval(&lock->base.count);
908
909 if (likely(!ret)) {
910 ww_mutex_set_context_fastpath(lock, ctx);
911 mutex_set_owner(&lock->base);
912 } else
913 ret = __ww_mutex_lock_slowpath(lock, ctx);
914 return ret;
915}
916EXPORT_SYMBOL(__ww_mutex_lock);
917
918int __sched
919__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
920{
921 int ret;
922
923 might_sleep();
924
925 ret = __mutex_fastpath_lock_retval(&lock->base.count);
926
927 if (likely(!ret)) {
928 ww_mutex_set_context_fastpath(lock, ctx);
929 mutex_set_owner(&lock->base);
930 } else
931 ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx);
932 return ret;
933}
934EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
935
936#endif
937
938/**
939 * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
940 * @cnt: the atomic which we are to dec
941 * @lock: the mutex to return holding if we dec to 0
942 *
943 * return true and hold lock if we dec to 0, return false otherwise
944 */
945int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
946{
947 /* dec if we can't possibly hit 0 */
948 if (atomic_add_unless(cnt, -1, 1))
949 return 0;
950 /* we might hit 0, so take the lock */
951 mutex_lock(lock);
952 if (!atomic_dec_and_test(cnt)) {
953 /* when we actually did the dec, we didn't hit 0 */
954 mutex_unlock(lock);
955 return 0;
956 }
957 /* we hit 0, and we hold the lock */
958 return 1;
959}
960EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
new file mode 100644
index 000000000000..4115fbf83b12
--- /dev/null
+++ b/kernel/locking/mutex.h
@@ -0,0 +1,48 @@
1/*
2 * Mutexes: blocking mutual exclusion locks
3 *
4 * started by Ingo Molnar:
5 *
6 * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 *
8 * This file contains mutex debugging related internal prototypes, for the
9 * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
10 */
11
12#define spin_lock_mutex(lock, flags) \
13 do { spin_lock(lock); (void)(flags); } while (0)
14#define spin_unlock_mutex(lock, flags) \
15 do { spin_unlock(lock); (void)(flags); } while (0)
16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next)
18
19#ifdef CONFIG_SMP
20static inline void mutex_set_owner(struct mutex *lock)
21{
22 lock->owner = current;
23}
24
25static inline void mutex_clear_owner(struct mutex *lock)
26{
27 lock->owner = NULL;
28}
29#else
30static inline void mutex_set_owner(struct mutex *lock)
31{
32}
33
34static inline void mutex_clear_owner(struct mutex *lock)
35{
36}
37#endif
38
39#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
40#define debug_mutex_free_waiter(waiter) do { } while (0)
41#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
42#define debug_mutex_unlock(lock) do { } while (0)
43#define debug_mutex_init(lock, name, key) do { } while (0)
44
45static inline void
46debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
47{
48}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
new file mode 100644
index 000000000000..652a8ee8efe9
--- /dev/null
+++ b/kernel/locking/percpu-rwsem.c
@@ -0,0 +1,165 @@
1#include <linux/atomic.h>
2#include <linux/rwsem.h>
3#include <linux/percpu.h>
4#include <linux/wait.h>
5#include <linux/lockdep.h>
6#include <linux/percpu-rwsem.h>
7#include <linux/rcupdate.h>
8#include <linux/sched.h>
9#include <linux/errno.h>
10
11int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
12 const char *name, struct lock_class_key *rwsem_key)
13{
14 brw->fast_read_ctr = alloc_percpu(int);
15 if (unlikely(!brw->fast_read_ctr))
16 return -ENOMEM;
17
18 /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
19 __init_rwsem(&brw->rw_sem, name, rwsem_key);
20 atomic_set(&brw->write_ctr, 0);
21 atomic_set(&brw->slow_read_ctr, 0);
22 init_waitqueue_head(&brw->write_waitq);
23 return 0;
24}
25
26void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
27{
28 free_percpu(brw->fast_read_ctr);
29 brw->fast_read_ctr = NULL; /* catch use after free bugs */
30}
31
32/*
33 * This is the fast-path for down_read/up_read, it only needs to ensure
34 * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
35 * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
36 * serialize with the preempt-disabled section below.
37 *
38 * The nontrivial part is that we should guarantee acquire/release semantics
39 * in case when
40 *
41 * R_W: down_write() comes after up_read(), the writer should see all
42 * changes done by the reader
43 * or
44 * W_R: down_read() comes after up_write(), the reader should see all
45 * changes done by the writer
46 *
47 * If this helper fails the callers rely on the normal rw_semaphore and
48 * atomic_dec_and_test(), so in this case we have the necessary barriers.
49 *
50 * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
51 * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
52 * reader inside the critical section. See the comments in down_write and
53 * up_write below.
54 */
55static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
56{
57 bool success = false;
58
59 preempt_disable();
60 if (likely(!atomic_read(&brw->write_ctr))) {
61 __this_cpu_add(*brw->fast_read_ctr, val);
62 success = true;
63 }
64 preempt_enable();
65
66 return success;
67}
68
69/*
70 * Like the normal down_read() this is not recursive, the writer can
71 * come after the first percpu_down_read() and create the deadlock.
72 *
73 * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
74 * percpu_up_read() does rwsem_release(). This pairs with the usage
75 * of ->rw_sem in percpu_down/up_write().
76 */
77void percpu_down_read(struct percpu_rw_semaphore *brw)
78{
79 might_sleep();
80 if (likely(update_fast_ctr(brw, +1))) {
81 rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
82 return;
83 }
84
85 down_read(&brw->rw_sem);
86 atomic_inc(&brw->slow_read_ctr);
87 /* avoid up_read()->rwsem_release() */
88 __up_read(&brw->rw_sem);
89}
90
91void percpu_up_read(struct percpu_rw_semaphore *brw)
92{
93 rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
94
95 if (likely(update_fast_ctr(brw, -1)))
96 return;
97
98 /* false-positive is possible but harmless */
99 if (atomic_dec_and_test(&brw->slow_read_ctr))
100 wake_up_all(&brw->write_waitq);
101}
102
103static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
104{
105 unsigned int sum = 0;
106 int cpu;
107
108 for_each_possible_cpu(cpu) {
109 sum += per_cpu(*brw->fast_read_ctr, cpu);
110 per_cpu(*brw->fast_read_ctr, cpu) = 0;
111 }
112
113 return sum;
114}
115
116/*
117 * A writer increments ->write_ctr to force the readers to switch to the
118 * slow mode, note the atomic_read() check in update_fast_ctr().
119 *
120 * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
121 * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
122 * counter it represents the number of active readers.
123 *
124 * Finally the writer takes ->rw_sem for writing and blocks the new readers,
125 * then waits until the slow counter becomes zero.
126 */
127void percpu_down_write(struct percpu_rw_semaphore *brw)
128{
129 /* tell update_fast_ctr() there is a pending writer */
130 atomic_inc(&brw->write_ctr);
131 /*
132 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
133 * so that update_fast_ctr() can't succeed.
134 *
135 * 2. Ensures we see the result of every previous this_cpu_add() in
136 * update_fast_ctr().
137 *
138 * 3. Ensures that if any reader has exited its critical section via
139 * fast-path, it executes a full memory barrier before we return.
140 * See R_W case in the comment above update_fast_ctr().
141 */
142 synchronize_sched_expedited();
143
144 /* exclude other writers, and block the new readers completely */
145 down_write(&brw->rw_sem);
146
147 /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
148 atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
149
150 /* wait for all readers to complete their percpu_up_read() */
151 wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
152}
153
154void percpu_up_write(struct percpu_rw_semaphore *brw)
155{
156 /* release the lock, but the readers can't use the fast-path */
157 up_write(&brw->rw_sem);
158 /*
159 * Insert the barrier before the next fast-path in down_read,
160 * see W_R case in the comment above update_fast_ctr().
161 */
162 synchronize_sched_expedited();
163 /* the last writer unblocks update_fast_ctr() */
164 atomic_dec(&brw->write_ctr);
165}
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
new file mode 100644
index 000000000000..13b243a323fa
--- /dev/null
+++ b/kernel/locking/rtmutex-debug.c
@@ -0,0 +1,187 @@
1/*
2 * RT-Mutexes: blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner:
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * This code is based on the rt.c implementation in the preempt-rt tree.
10 * Portions of said code are
11 *
12 * Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey
13 * Copyright (C) 2006 Esben Nielsen
14 * Copyright (C) 2006 Kihon Technologies Inc.,
15 * Steven Rostedt <rostedt@goodmis.org>
16 *
17 * See rt.c in preempt-rt for proper credits and further information
18 */
19#include <linux/sched.h>
20#include <linux/sched/rt.h>
21#include <linux/delay.h>
22#include <linux/export.h>
23#include <linux/spinlock.h>
24#include <linux/kallsyms.h>
25#include <linux/syscalls.h>
26#include <linux/interrupt.h>
27#include <linux/plist.h>
28#include <linux/fs.h>
29#include <linux/debug_locks.h>
30
31#include "rtmutex_common.h"
32
33static void printk_task(struct task_struct *p)
34{
35 if (p)
36 printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
37 else
38 printk("<none>");
39}
40
41static void printk_lock(struct rt_mutex *lock, int print_owner)
42{
43 if (lock->name)
44 printk(" [%p] {%s}\n",
45 lock, lock->name);
46 else
47 printk(" [%p] {%s:%d}\n",
48 lock, lock->file, lock->line);
49
50 if (print_owner && rt_mutex_owner(lock)) {
51 printk(".. ->owner: %p\n", lock->owner);
52 printk(".. held by: ");
53 printk_task(rt_mutex_owner(lock));
54 printk("\n");
55 }
56}
57
58void rt_mutex_debug_task_free(struct task_struct *task)
59{
60 DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters));
61 DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
62}
63
64/*
65 * We fill out the fields in the waiter to store the information about
66 * the deadlock. We print when we return. act_waiter can be NULL in
67 * case of a remove waiter operation.
68 */
69void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter,
70 struct rt_mutex *lock)
71{
72 struct task_struct *task;
73
74 if (!debug_locks || detect || !act_waiter)
75 return;
76
77 task = rt_mutex_owner(act_waiter->lock);
78 if (task && task != current) {
79 act_waiter->deadlock_task_pid = get_pid(task_pid(task));
80 act_waiter->deadlock_lock = lock;
81 }
82}
83
84void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
85{
86 struct task_struct *task;
87
88 if (!waiter->deadlock_lock || !debug_locks)
89 return;
90
91 rcu_read_lock();
92 task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID);
93 if (!task) {
94 rcu_read_unlock();
95 return;
96 }
97
98 if (!debug_locks_off()) {
99 rcu_read_unlock();
100 return;
101 }
102
103 printk("\n============================================\n");
104 printk( "[ BUG: circular locking deadlock detected! ]\n");
105 printk("%s\n", print_tainted());
106 printk( "--------------------------------------------\n");
107 printk("%s/%d is deadlocking current task %s/%d\n\n",
108 task->comm, task_pid_nr(task),
109 current->comm, task_pid_nr(current));
110
111 printk("\n1) %s/%d is trying to acquire this lock:\n",
112 current->comm, task_pid_nr(current));
113 printk_lock(waiter->lock, 1);
114
115 printk("\n2) %s/%d is blocked on this lock:\n",
116 task->comm, task_pid_nr(task));
117 printk_lock(waiter->deadlock_lock, 1);
118
119 debug_show_held_locks(current);
120 debug_show_held_locks(task);
121
122 printk("\n%s/%d's [blocked] stackdump:\n\n",
123 task->comm, task_pid_nr(task));
124 show_stack(task, NULL);
125 printk("\n%s/%d's [current] stackdump:\n\n",
126 current->comm, task_pid_nr(current));
127 dump_stack();
128 debug_show_all_locks();
129 rcu_read_unlock();
130
131 printk("[ turning off deadlock detection."
132 "Please report this trace. ]\n\n");
133}
134
135void debug_rt_mutex_lock(struct rt_mutex *lock)
136{
137}
138
139void debug_rt_mutex_unlock(struct rt_mutex *lock)
140{
141 DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current);
142}
143
144void
145debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
146{
147}
148
149void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
150{
151 DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock));
152}
153
154void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
155{
156 memset(waiter, 0x11, sizeof(*waiter));
157 plist_node_init(&waiter->list_entry, MAX_PRIO);
158 plist_node_init(&waiter->pi_list_entry, MAX_PRIO);
159 waiter->deadlock_task_pid = NULL;
160}
161
162void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
163{
164 put_pid(waiter->deadlock_task_pid);
165 DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry));
166 DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
167 memset(waiter, 0x22, sizeof(*waiter));
168}
169
170void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
171{
172 /*
173 * Make sure we are not reinitializing a held lock:
174 */
175 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
176 lock->name = name;
177}
178
179void
180rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
181{
182}
183
184void rt_mutex_deadlock_account_unlock(struct task_struct *task)
185{
186}
187
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
new file mode 100644
index 000000000000..14193d596d78
--- /dev/null
+++ b/kernel/locking/rtmutex-debug.h
@@ -0,0 +1,33 @@
1/*
2 * RT-Mutexes: blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner:
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * This file contains macros used solely by rtmutex.c. Debug version.
10 */
11
12extern void
13rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
14extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
15extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
16extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
17extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
18extern void debug_rt_mutex_lock(struct rt_mutex *lock);
19extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
20extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
21 struct task_struct *powner);
22extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
23extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
24 struct rt_mutex *lock);
25extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
26# define debug_rt_mutex_reset_waiter(w) \
27 do { (w)->deadlock_lock = NULL; } while (0)
28
29static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
30 int detect)
31{
32 return (waiter != NULL);
33}
diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c
new file mode 100644
index 000000000000..1d96dd0d93c1
--- /dev/null
+++ b/kernel/locking/rtmutex-tester.c
@@ -0,0 +1,420 @@
1/*
2 * RT-Mutex-tester: scriptable tester for rt mutexes
3 *
4 * started by Thomas Gleixner:
5 *
6 * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
7 *
8 */
9#include <linux/device.h>
10#include <linux/kthread.h>
11#include <linux/export.h>
12#include <linux/sched.h>
13#include <linux/sched/rt.h>
14#include <linux/spinlock.h>
15#include <linux/timer.h>
16#include <linux/freezer.h>
17#include <linux/stat.h>
18
19#include "rtmutex.h"
20
21#define MAX_RT_TEST_THREADS 8
22#define MAX_RT_TEST_MUTEXES 8
23
24static spinlock_t rttest_lock;
25static atomic_t rttest_event;
26
27struct test_thread_data {
28 int opcode;
29 int opdata;
30 int mutexes[MAX_RT_TEST_MUTEXES];
31 int event;
32 struct device dev;
33};
34
35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
36static struct task_struct *threads[MAX_RT_TEST_THREADS];
37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
38
39enum test_opcodes {
40 RTTEST_NOP = 0,
41 RTTEST_SCHEDOT, /* 1 Sched other, data = nice */
42 RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */
43 RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */
44 RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */
45 RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */
46 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
47 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
48 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
49 /* 9, 10 - reserved for BKL commemoration */
50 RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
51 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
52 RTTEST_RESET = 99, /* 99 Reset all pending operations */
53};
54
55static int handle_op(struct test_thread_data *td, int lockwakeup)
56{
57 int i, id, ret = -EINVAL;
58
59 switch(td->opcode) {
60
61 case RTTEST_NOP:
62 return 0;
63
64 case RTTEST_LOCKCONT:
65 td->mutexes[td->opdata] = 1;
66 td->event = atomic_add_return(1, &rttest_event);
67 return 0;
68
69 case RTTEST_RESET:
70 for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) {
71 if (td->mutexes[i] == 4) {
72 rt_mutex_unlock(&mutexes[i]);
73 td->mutexes[i] = 0;
74 }
75 }
76 return 0;
77
78 case RTTEST_RESETEVENT:
79 atomic_set(&rttest_event, 0);
80 return 0;
81
82 default:
83 if (lockwakeup)
84 return ret;
85 }
86
87 switch(td->opcode) {
88
89 case RTTEST_LOCK:
90 case RTTEST_LOCKNOWAIT:
91 id = td->opdata;
92 if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
93 return ret;
94
95 td->mutexes[id] = 1;
96 td->event = atomic_add_return(1, &rttest_event);
97 rt_mutex_lock(&mutexes[id]);
98 td->event = atomic_add_return(1, &rttest_event);
99 td->mutexes[id] = 4;
100 return 0;
101
102 case RTTEST_LOCKINT:
103 case RTTEST_LOCKINTNOWAIT:
104 id = td->opdata;
105 if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
106 return ret;
107
108 td->mutexes[id] = 1;
109 td->event = atomic_add_return(1, &rttest_event);
110 ret = rt_mutex_lock_interruptible(&mutexes[id], 0);
111 td->event = atomic_add_return(1, &rttest_event);
112 td->mutexes[id] = ret ? 0 : 4;
113 return ret ? -EINTR : 0;
114
115 case RTTEST_UNLOCK:
116 id = td->opdata;
117 if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4)
118 return ret;
119
120 td->event = atomic_add_return(1, &rttest_event);
121 rt_mutex_unlock(&mutexes[id]);
122 td->event = atomic_add_return(1, &rttest_event);
123 td->mutexes[id] = 0;
124 return 0;
125
126 default:
127 break;
128 }
129 return ret;
130}
131
132/*
133 * Schedule replacement for rtsem_down(). Only called for threads with
134 * PF_MUTEX_TESTER set.
135 *
136 * This allows us to have finegrained control over the event flow.
137 *
138 */
139void schedule_rt_mutex_test(struct rt_mutex *mutex)
140{
141 int tid, op, dat;
142 struct test_thread_data *td;
143
144 /* We have to lookup the task */
145 for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) {
146 if (threads[tid] == current)
147 break;
148 }
149
150 BUG_ON(tid == MAX_RT_TEST_THREADS);
151
152 td = &thread_data[tid];
153
154 op = td->opcode;
155 dat = td->opdata;
156
157 switch (op) {
158 case RTTEST_LOCK:
159 case RTTEST_LOCKINT:
160 case RTTEST_LOCKNOWAIT:
161 case RTTEST_LOCKINTNOWAIT:
162 if (mutex != &mutexes[dat])
163 break;
164
165 if (td->mutexes[dat] != 1)
166 break;
167
168 td->mutexes[dat] = 2;
169 td->event = atomic_add_return(1, &rttest_event);
170 break;
171
172 default:
173 break;
174 }
175
176 schedule();
177
178
179 switch (op) {
180 case RTTEST_LOCK:
181 case RTTEST_LOCKINT:
182 if (mutex != &mutexes[dat])
183 return;
184
185 if (td->mutexes[dat] != 2)
186 return;
187
188 td->mutexes[dat] = 3;
189 td->event = atomic_add_return(1, &rttest_event);
190 break;
191
192 case RTTEST_LOCKNOWAIT:
193 case RTTEST_LOCKINTNOWAIT:
194 if (mutex != &mutexes[dat])
195 return;
196
197 if (td->mutexes[dat] != 2)
198 return;
199
200 td->mutexes[dat] = 1;
201 td->event = atomic_add_return(1, &rttest_event);
202 return;
203
204 default:
205 return;
206 }
207
208 td->opcode = 0;
209
210 for (;;) {
211 set_current_state(TASK_INTERRUPTIBLE);
212
213 if (td->opcode > 0) {
214 int ret;
215
216 set_current_state(TASK_RUNNING);
217 ret = handle_op(td, 1);
218 set_current_state(TASK_INTERRUPTIBLE);
219 if (td->opcode == RTTEST_LOCKCONT)
220 break;
221 td->opcode = ret;
222 }
223
224 /* Wait for the next command to be executed */
225 schedule();
226 }
227
228 /* Restore previous command and data */
229 td->opcode = op;
230 td->opdata = dat;
231}
232
233static int test_func(void *data)
234{
235 struct test_thread_data *td = data;
236 int ret;
237
238 current->flags |= PF_MUTEX_TESTER;
239 set_freezable();
240 allow_signal(SIGHUP);
241
242 for(;;) {
243
244 set_current_state(TASK_INTERRUPTIBLE);
245
246 if (td->opcode > 0) {
247 set_current_state(TASK_RUNNING);
248 ret = handle_op(td, 0);
249 set_current_state(TASK_INTERRUPTIBLE);
250 td->opcode = ret;
251 }
252
253 /* Wait for the next command to be executed */
254 schedule();
255 try_to_freeze();
256
257 if (signal_pending(current))
258 flush_signals(current);
259
260 if(kthread_should_stop())
261 break;
262 }
263 return 0;
264}
265
266/**
267 * sysfs_test_command - interface for test commands
268 * @dev: thread reference
269 * @buf: command for actual step
270 * @count: length of buffer
271 *
272 * command syntax:
273 *
274 * opcode:data
275 */
276static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr,
277 const char *buf, size_t count)
278{
279 struct sched_param schedpar;
280 struct test_thread_data *td;
281 char cmdbuf[32];
282 int op, dat, tid, ret;
283
284 td = container_of(dev, struct test_thread_data, dev);
285 tid = td->dev.id;
286
287 /* strings from sysfs write are not 0 terminated! */
288 if (count >= sizeof(cmdbuf))
289 return -EINVAL;
290
291 /* strip of \n: */
292 if (buf[count-1] == '\n')
293 count--;
294 if (count < 1)
295 return -EINVAL;
296
297 memcpy(cmdbuf, buf, count);
298 cmdbuf[count] = 0;
299
300 if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2)
301 return -EINVAL;
302
303 switch (op) {
304 case RTTEST_SCHEDOT:
305 schedpar.sched_priority = 0;
306 ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar);
307 if (ret)
308 return ret;
309 set_user_nice(current, 0);
310 break;
311
312 case RTTEST_SCHEDRT:
313 schedpar.sched_priority = dat;
314 ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar);
315 if (ret)
316 return ret;
317 break;
318
319 case RTTEST_SIGNAL:
320 send_sig(SIGHUP, threads[tid], 0);
321 break;
322
323 default:
324 if (td->opcode > 0)
325 return -EBUSY;
326 td->opdata = dat;
327 td->opcode = op;
328 wake_up_process(threads[tid]);
329 }
330
331 return count;
332}
333
334/**
335 * sysfs_test_status - sysfs interface for rt tester
336 * @dev: thread to query
337 * @buf: char buffer to be filled with thread status info
338 */
339static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr,
340 char *buf)
341{
342 struct test_thread_data *td;
343 struct task_struct *tsk;
344 char *curr = buf;
345 int i;
346
347 td = container_of(dev, struct test_thread_data, dev);
348 tsk = threads[td->dev.id];
349
350 spin_lock(&rttest_lock);
351
352 curr += sprintf(curr,
353 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
354 td->opcode, td->event, tsk->state,
355 (MAX_RT_PRIO - 1) - tsk->prio,
356 (MAX_RT_PRIO - 1) - tsk->normal_prio,
357 tsk->pi_blocked_on);
358
359 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
360 curr += sprintf(curr, "%d", td->mutexes[i]);
361
362 spin_unlock(&rttest_lock);
363
364 curr += sprintf(curr, ", T: %p, R: %p\n", tsk,
365 mutexes[td->dev.id].owner);
366
367 return curr - buf;
368}
369
370static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
371static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
372
373static struct bus_type rttest_subsys = {
374 .name = "rttest",
375 .dev_name = "rttest",
376};
377
378static int init_test_thread(int id)
379{
380 thread_data[id].dev.bus = &rttest_subsys;
381 thread_data[id].dev.id = id;
382
383 threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id);
384 if (IS_ERR(threads[id]))
385 return PTR_ERR(threads[id]);
386
387 return device_register(&thread_data[id].dev);
388}
389
390static int init_rttest(void)
391{
392 int ret, i;
393
394 spin_lock_init(&rttest_lock);
395
396 for (i = 0; i < MAX_RT_TEST_MUTEXES; i++)
397 rt_mutex_init(&mutexes[i]);
398
399 ret = subsys_system_register(&rttest_subsys, NULL);
400 if (ret)
401 return ret;
402
403 for (i = 0; i < MAX_RT_TEST_THREADS; i++) {
404 ret = init_test_thread(i);
405 if (ret)
406 break;
407 ret = device_create_file(&thread_data[i].dev, &dev_attr_status);
408 if (ret)
409 break;
410 ret = device_create_file(&thread_data[i].dev, &dev_attr_command);
411 if (ret)
412 break;
413 }
414
415 printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" );
416
417 return ret;
418}
419
420device_initcall(init_rttest);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
new file mode 100644
index 000000000000..0dd6aec1cb6a
--- /dev/null
+++ b/kernel/locking/rtmutex.c
@@ -0,0 +1,1060 @@
1/*
2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner.
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen
10 *
11 * See Documentation/rt-mutex-design.txt for details.
12 */
13#include <linux/spinlock.h>
14#include <linux/export.h>
15#include <linux/sched.h>
16#include <linux/sched/rt.h>
17#include <linux/timer.h>
18
19#include "rtmutex_common.h"
20
21/*
22 * lock->owner state tracking:
23 *
24 * lock->owner holds the task_struct pointer of the owner. Bit 0
25 * is used to keep track of the "lock has waiters" state.
26 *
27 * owner bit0
28 * NULL 0 lock is free (fast acquire possible)
29 * NULL 1 lock is free and has waiters and the top waiter
30 * is going to take the lock*
31 * taskpointer 0 lock is held (fast release possible)
32 * taskpointer 1 lock is held and has waiters**
33 *
34 * The fast atomic compare exchange based acquire and release is only
35 * possible when bit 0 of lock->owner is 0.
36 *
37 * (*) It also can be a transitional state when grabbing the lock
38 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
39 * we need to set the bit0 before looking at the lock, and the owner may be
40 * NULL in this small time, hence this can be a transitional state.
41 *
42 * (**) There is a small time when bit 0 is set but there are no
43 * waiters. This can happen when grabbing the lock in the slow path.
44 * To prevent a cmpxchg of the owner releasing the lock, we need to
45 * set this bit before looking at the lock.
46 */
47
48static void
49rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
50{
51 unsigned long val = (unsigned long)owner;
52
53 if (rt_mutex_has_waiters(lock))
54 val |= RT_MUTEX_HAS_WAITERS;
55
56 lock->owner = (struct task_struct *)val;
57}
58
59static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
60{
61 lock->owner = (struct task_struct *)
62 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
63}
64
65static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
66{
67 if (!rt_mutex_has_waiters(lock))
68 clear_rt_mutex_waiters(lock);
69}
70
71/*
72 * We can speed up the acquire/release, if the architecture
73 * supports cmpxchg and if there's no debugging state to be set up
74 */
75#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
76# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
77static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
78{
79 unsigned long owner, *p = (unsigned long *) &lock->owner;
80
81 do {
82 owner = *p;
83 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
84}
85#else
86# define rt_mutex_cmpxchg(l,c,n) (0)
87static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
88{
89 lock->owner = (struct task_struct *)
90 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
91}
92#endif
93
94/*
95 * Calculate task priority from the waiter list priority
96 *
97 * Return task->normal_prio when the waiter list is empty or when
98 * the waiter is not allowed to do priority boosting
99 */
100int rt_mutex_getprio(struct task_struct *task)
101{
102 if (likely(!task_has_pi_waiters(task)))
103 return task->normal_prio;
104
105 return min(task_top_pi_waiter(task)->pi_list_entry.prio,
106 task->normal_prio);
107}
108
109/*
110 * Adjust the priority of a task, after its pi_waiters got modified.
111 *
112 * This can be both boosting and unboosting. task->pi_lock must be held.
113 */
114static void __rt_mutex_adjust_prio(struct task_struct *task)
115{
116 int prio = rt_mutex_getprio(task);
117
118 if (task->prio != prio)
119 rt_mutex_setprio(task, prio);
120}
121
122/*
123 * Adjust task priority (undo boosting). Called from the exit path of
124 * rt_mutex_slowunlock() and rt_mutex_slowlock().
125 *
126 * (Note: We do this outside of the protection of lock->wait_lock to
127 * allow the lock to be taken while or before we readjust the priority
128 * of task. We do not use the spin_xx_mutex() variants here as we are
129 * outside of the debug path.)
130 */
131static void rt_mutex_adjust_prio(struct task_struct *task)
132{
133 unsigned long flags;
134
135 raw_spin_lock_irqsave(&task->pi_lock, flags);
136 __rt_mutex_adjust_prio(task);
137 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
138}
139
140/*
141 * Max number of times we'll walk the boosting chain:
142 */
143int max_lock_depth = 1024;
144
145/*
146 * Adjust the priority chain. Also used for deadlock detection.
147 * Decreases task's usage by one - may thus free the task.
148 *
149 * @task: the task owning the mutex (owner) for which a chain walk is probably
150 * needed
151 * @deadlock_detect: do we have to carry out deadlock detection?
152 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
153 * things for a task that has just got its priority adjusted, and
154 * is waiting on a mutex)
155 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
156 * its priority to the mutex owner (can be NULL in the case
157 * depicted above or if the top waiter is gone away and we are
158 * actually deboosting the owner)
159 * @top_task: the current top waiter
160 *
161 * Returns 0 or -EDEADLK.
162 */
163static int rt_mutex_adjust_prio_chain(struct task_struct *task,
164 int deadlock_detect,
165 struct rt_mutex *orig_lock,
166 struct rt_mutex_waiter *orig_waiter,
167 struct task_struct *top_task)
168{
169 struct rt_mutex *lock;
170 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
171 int detect_deadlock, ret = 0, depth = 0;
172 unsigned long flags;
173
174 detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
175 deadlock_detect);
176
177 /*
178 * The (de)boosting is a step by step approach with a lot of
179 * pitfalls. We want this to be preemptible and we want hold a
180 * maximum of two locks per step. So we have to check
181 * carefully whether things change under us.
182 */
183 again:
184 if (++depth > max_lock_depth) {
185 static int prev_max;
186
187 /*
188 * Print this only once. If the admin changes the limit,
189 * print a new message when reaching the limit again.
190 */
191 if (prev_max != max_lock_depth) {
192 prev_max = max_lock_depth;
193 printk(KERN_WARNING "Maximum lock depth %d reached "
194 "task: %s (%d)\n", max_lock_depth,
195 top_task->comm, task_pid_nr(top_task));
196 }
197 put_task_struct(task);
198
199 return deadlock_detect ? -EDEADLK : 0;
200 }
201 retry:
202 /*
203 * Task can not go away as we did a get_task() before !
204 */
205 raw_spin_lock_irqsave(&task->pi_lock, flags);
206
207 waiter = task->pi_blocked_on;
208 /*
209 * Check whether the end of the boosting chain has been
210 * reached or the state of the chain has changed while we
211 * dropped the locks.
212 */
213 if (!waiter)
214 goto out_unlock_pi;
215
216 /*
217 * Check the orig_waiter state. After we dropped the locks,
218 * the previous owner of the lock might have released the lock.
219 */
220 if (orig_waiter && !rt_mutex_owner(orig_lock))
221 goto out_unlock_pi;
222
223 /*
224 * Drop out, when the task has no waiters. Note,
225 * top_waiter can be NULL, when we are in the deboosting
226 * mode!
227 */
228 if (top_waiter && (!task_has_pi_waiters(task) ||
229 top_waiter != task_top_pi_waiter(task)))
230 goto out_unlock_pi;
231
232 /*
233 * When deadlock detection is off then we check, if further
234 * priority adjustment is necessary.
235 */
236 if (!detect_deadlock && waiter->list_entry.prio == task->prio)
237 goto out_unlock_pi;
238
239 lock = waiter->lock;
240 if (!raw_spin_trylock(&lock->wait_lock)) {
241 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
242 cpu_relax();
243 goto retry;
244 }
245
246 /* Deadlock detection */
247 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
248 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
249 raw_spin_unlock(&lock->wait_lock);
250 ret = deadlock_detect ? -EDEADLK : 0;
251 goto out_unlock_pi;
252 }
253
254 top_waiter = rt_mutex_top_waiter(lock);
255
256 /* Requeue the waiter */
257 plist_del(&waiter->list_entry, &lock->wait_list);
258 waiter->list_entry.prio = task->prio;
259 plist_add(&waiter->list_entry, &lock->wait_list);
260
261 /* Release the task */
262 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
263 if (!rt_mutex_owner(lock)) {
264 /*
265 * If the requeue above changed the top waiter, then we need
266 * to wake the new top waiter up to try to get the lock.
267 */
268
269 if (top_waiter != rt_mutex_top_waiter(lock))
270 wake_up_process(rt_mutex_top_waiter(lock)->task);
271 raw_spin_unlock(&lock->wait_lock);
272 goto out_put_task;
273 }
274 put_task_struct(task);
275
276 /* Grab the next task */
277 task = rt_mutex_owner(lock);
278 get_task_struct(task);
279 raw_spin_lock_irqsave(&task->pi_lock, flags);
280
281 if (waiter == rt_mutex_top_waiter(lock)) {
282 /* Boost the owner */
283 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
284 waiter->pi_list_entry.prio = waiter->list_entry.prio;
285 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
286 __rt_mutex_adjust_prio(task);
287
288 } else if (top_waiter == waiter) {
289 /* Deboost the owner */
290 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
291 waiter = rt_mutex_top_waiter(lock);
292 waiter->pi_list_entry.prio = waiter->list_entry.prio;
293 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
294 __rt_mutex_adjust_prio(task);
295 }
296
297 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
298
299 top_waiter = rt_mutex_top_waiter(lock);
300 raw_spin_unlock(&lock->wait_lock);
301
302 if (!detect_deadlock && waiter != top_waiter)
303 goto out_put_task;
304
305 goto again;
306
307 out_unlock_pi:
308 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
309 out_put_task:
310 put_task_struct(task);
311
312 return ret;
313}
314
315/*
316 * Try to take an rt-mutex
317 *
318 * Must be called with lock->wait_lock held.
319 *
320 * @lock: the lock to be acquired.
321 * @task: the task which wants to acquire the lock
322 * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
323 */
324static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
325 struct rt_mutex_waiter *waiter)
326{
327 /*
328 * We have to be careful here if the atomic speedups are
329 * enabled, such that, when
330 * - no other waiter is on the lock
331 * - the lock has been released since we did the cmpxchg
332 * the lock can be released or taken while we are doing the
333 * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
334 *
335 * The atomic acquire/release aware variant of
336 * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
337 * the WAITERS bit, the atomic release / acquire can not
338 * happen anymore and lock->wait_lock protects us from the
339 * non-atomic case.
340 *
341 * Note, that this might set lock->owner =
342 * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
343 * any more. This is fixed up when we take the ownership.
344 * This is the transitional state explained at the top of this file.
345 */
346 mark_rt_mutex_waiters(lock);
347
348 if (rt_mutex_owner(lock))
349 return 0;
350
351 /*
352 * It will get the lock because of one of these conditions:
353 * 1) there is no waiter
354 * 2) higher priority than waiters
355 * 3) it is top waiter
356 */
357 if (rt_mutex_has_waiters(lock)) {
358 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
359 if (!waiter || waiter != rt_mutex_top_waiter(lock))
360 return 0;
361 }
362 }
363
364 if (waiter || rt_mutex_has_waiters(lock)) {
365 unsigned long flags;
366 struct rt_mutex_waiter *top;
367
368 raw_spin_lock_irqsave(&task->pi_lock, flags);
369
370 /* remove the queued waiter. */
371 if (waiter) {
372 plist_del(&waiter->list_entry, &lock->wait_list);
373 task->pi_blocked_on = NULL;
374 }
375
376 /*
377 * We have to enqueue the top waiter(if it exists) into
378 * task->pi_waiters list.
379 */
380 if (rt_mutex_has_waiters(lock)) {
381 top = rt_mutex_top_waiter(lock);
382 top->pi_list_entry.prio = top->list_entry.prio;
383 plist_add(&top->pi_list_entry, &task->pi_waiters);
384 }
385 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
386 }
387
388 /* We got the lock. */
389 debug_rt_mutex_lock(lock);
390
391 rt_mutex_set_owner(lock, task);
392
393 rt_mutex_deadlock_account_lock(lock, task);
394
395 return 1;
396}
397
398/*
399 * Task blocks on lock.
400 *
401 * Prepare waiter and propagate pi chain
402 *
403 * This must be called with lock->wait_lock held.
404 */
405static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
406 struct rt_mutex_waiter *waiter,
407 struct task_struct *task,
408 int detect_deadlock)
409{
410 struct task_struct *owner = rt_mutex_owner(lock);
411 struct rt_mutex_waiter *top_waiter = waiter;
412 unsigned long flags;
413 int chain_walk = 0, res;
414
415 raw_spin_lock_irqsave(&task->pi_lock, flags);
416 __rt_mutex_adjust_prio(task);
417 waiter->task = task;
418 waiter->lock = lock;
419 plist_node_init(&waiter->list_entry, task->prio);
420 plist_node_init(&waiter->pi_list_entry, task->prio);
421
422 /* Get the top priority waiter on the lock */
423 if (rt_mutex_has_waiters(lock))
424 top_waiter = rt_mutex_top_waiter(lock);
425 plist_add(&waiter->list_entry, &lock->wait_list);
426
427 task->pi_blocked_on = waiter;
428
429 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
430
431 if (!owner)
432 return 0;
433
434 if (waiter == rt_mutex_top_waiter(lock)) {
435 raw_spin_lock_irqsave(&owner->pi_lock, flags);
436 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
437 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
438
439 __rt_mutex_adjust_prio(owner);
440 if (owner->pi_blocked_on)
441 chain_walk = 1;
442 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
443 }
444 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
445 chain_walk = 1;
446
447 if (!chain_walk)
448 return 0;
449
450 /*
451 * The owner can't disappear while holding a lock,
452 * so the owner struct is protected by wait_lock.
453 * Gets dropped in rt_mutex_adjust_prio_chain()!
454 */
455 get_task_struct(owner);
456
457 raw_spin_unlock(&lock->wait_lock);
458
459 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
460 task);
461
462 raw_spin_lock(&lock->wait_lock);
463
464 return res;
465}
466
467/*
468 * Wake up the next waiter on the lock.
469 *
470 * Remove the top waiter from the current tasks waiter list and wake it up.
471 *
472 * Called with lock->wait_lock held.
473 */
474static void wakeup_next_waiter(struct rt_mutex *lock)
475{
476 struct rt_mutex_waiter *waiter;
477 unsigned long flags;
478
479 raw_spin_lock_irqsave(&current->pi_lock, flags);
480
481 waiter = rt_mutex_top_waiter(lock);
482
483 /*
484 * Remove it from current->pi_waiters. We do not adjust a
485 * possible priority boost right now. We execute wakeup in the
486 * boosted mode and go back to normal after releasing
487 * lock->wait_lock.
488 */
489 plist_del(&waiter->pi_list_entry, &current->pi_waiters);
490
491 rt_mutex_set_owner(lock, NULL);
492
493 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
494
495 wake_up_process(waiter->task);
496}
497
498/*
499 * Remove a waiter from a lock and give up
500 *
501 * Must be called with lock->wait_lock held and
502 * have just failed to try_to_take_rt_mutex().
503 */
504static void remove_waiter(struct rt_mutex *lock,
505 struct rt_mutex_waiter *waiter)
506{
507 int first = (waiter == rt_mutex_top_waiter(lock));
508 struct task_struct *owner = rt_mutex_owner(lock);
509 unsigned long flags;
510 int chain_walk = 0;
511
512 raw_spin_lock_irqsave(&current->pi_lock, flags);
513 plist_del(&waiter->list_entry, &lock->wait_list);
514 current->pi_blocked_on = NULL;
515 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
516
517 if (!owner)
518 return;
519
520 if (first) {
521
522 raw_spin_lock_irqsave(&owner->pi_lock, flags);
523
524 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
525
526 if (rt_mutex_has_waiters(lock)) {
527 struct rt_mutex_waiter *next;
528
529 next = rt_mutex_top_waiter(lock);
530 plist_add(&next->pi_list_entry, &owner->pi_waiters);
531 }
532 __rt_mutex_adjust_prio(owner);
533
534 if (owner->pi_blocked_on)
535 chain_walk = 1;
536
537 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
538 }
539
540 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
541
542 if (!chain_walk)
543 return;
544
545 /* gets dropped in rt_mutex_adjust_prio_chain()! */
546 get_task_struct(owner);
547
548 raw_spin_unlock(&lock->wait_lock);
549
550 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
551
552 raw_spin_lock(&lock->wait_lock);
553}
554
555/*
556 * Recheck the pi chain, in case we got a priority setting
557 *
558 * Called from sched_setscheduler
559 */
560void rt_mutex_adjust_pi(struct task_struct *task)
561{
562 struct rt_mutex_waiter *waiter;
563 unsigned long flags;
564
565 raw_spin_lock_irqsave(&task->pi_lock, flags);
566
567 waiter = task->pi_blocked_on;
568 if (!waiter || waiter->list_entry.prio == task->prio) {
569 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
570 return;
571 }
572
573 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
574
575 /* gets dropped in rt_mutex_adjust_prio_chain()! */
576 get_task_struct(task);
577 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
578}
579
580/**
581 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
582 * @lock: the rt_mutex to take
583 * @state: the state the task should block in (TASK_INTERRUPTIBLE
584 * or TASK_UNINTERRUPTIBLE)
585 * @timeout: the pre-initialized and started timer, or NULL for none
586 * @waiter: the pre-initialized rt_mutex_waiter
587 *
588 * lock->wait_lock must be held by the caller.
589 */
590static int __sched
591__rt_mutex_slowlock(struct rt_mutex *lock, int state,
592 struct hrtimer_sleeper *timeout,
593 struct rt_mutex_waiter *waiter)
594{
595 int ret = 0;
596
597 for (;;) {
598 /* Try to acquire the lock: */
599 if (try_to_take_rt_mutex(lock, current, waiter))
600 break;
601
602 /*
603 * TASK_INTERRUPTIBLE checks for signals and
604 * timeout. Ignored otherwise.
605 */
606 if (unlikely(state == TASK_INTERRUPTIBLE)) {
607 /* Signal pending? */
608 if (signal_pending(current))
609 ret = -EINTR;
610 if (timeout && !timeout->task)
611 ret = -ETIMEDOUT;
612 if (ret)
613 break;
614 }
615
616 raw_spin_unlock(&lock->wait_lock);
617
618 debug_rt_mutex_print_deadlock(waiter);
619
620 schedule_rt_mutex(lock);
621
622 raw_spin_lock(&lock->wait_lock);
623 set_current_state(state);
624 }
625
626 return ret;
627}
628
629/*
630 * Slow path lock function:
631 */
632static int __sched
633rt_mutex_slowlock(struct rt_mutex *lock, int state,
634 struct hrtimer_sleeper *timeout,
635 int detect_deadlock)
636{
637 struct rt_mutex_waiter waiter;
638 int ret = 0;
639
640 debug_rt_mutex_init_waiter(&waiter);
641
642 raw_spin_lock(&lock->wait_lock);
643
644 /* Try to acquire the lock again: */
645 if (try_to_take_rt_mutex(lock, current, NULL)) {
646 raw_spin_unlock(&lock->wait_lock);
647 return 0;
648 }
649
650 set_current_state(state);
651
652 /* Setup the timer, when timeout != NULL */
653 if (unlikely(timeout)) {
654 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
655 if (!hrtimer_active(&timeout->timer))
656 timeout->task = NULL;
657 }
658
659 ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
660
661 if (likely(!ret))
662 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
663
664 set_current_state(TASK_RUNNING);
665
666 if (unlikely(ret))
667 remove_waiter(lock, &waiter);
668
669 /*
670 * try_to_take_rt_mutex() sets the waiter bit
671 * unconditionally. We might have to fix that up.
672 */
673 fixup_rt_mutex_waiters(lock);
674
675 raw_spin_unlock(&lock->wait_lock);
676
677 /* Remove pending timer: */
678 if (unlikely(timeout))
679 hrtimer_cancel(&timeout->timer);
680
681 debug_rt_mutex_free_waiter(&waiter);
682
683 return ret;
684}
685
686/*
687 * Slow path try-lock function:
688 */
689static inline int
690rt_mutex_slowtrylock(struct rt_mutex *lock)
691{
692 int ret = 0;
693
694 raw_spin_lock(&lock->wait_lock);
695
696 if (likely(rt_mutex_owner(lock) != current)) {
697
698 ret = try_to_take_rt_mutex(lock, current, NULL);
699 /*
700 * try_to_take_rt_mutex() sets the lock waiters
701 * bit unconditionally. Clean this up.
702 */
703 fixup_rt_mutex_waiters(lock);
704 }
705
706 raw_spin_unlock(&lock->wait_lock);
707
708 return ret;
709}
710
711/*
712 * Slow path to release a rt-mutex:
713 */
714static void __sched
715rt_mutex_slowunlock(struct rt_mutex *lock)
716{
717 raw_spin_lock(&lock->wait_lock);
718
719 debug_rt_mutex_unlock(lock);
720
721 rt_mutex_deadlock_account_unlock(current);
722
723 if (!rt_mutex_has_waiters(lock)) {
724 lock->owner = NULL;
725 raw_spin_unlock(&lock->wait_lock);
726 return;
727 }
728
729 wakeup_next_waiter(lock);
730
731 raw_spin_unlock(&lock->wait_lock);
732
733 /* Undo pi boosting if necessary: */
734 rt_mutex_adjust_prio(current);
735}
736
737/*
738 * debug aware fast / slowpath lock,trylock,unlock
739 *
740 * The atomic acquire/release ops are compiled away, when either the
741 * architecture does not support cmpxchg or when debugging is enabled.
742 */
743static inline int
744rt_mutex_fastlock(struct rt_mutex *lock, int state,
745 int detect_deadlock,
746 int (*slowfn)(struct rt_mutex *lock, int state,
747 struct hrtimer_sleeper *timeout,
748 int detect_deadlock))
749{
750 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
751 rt_mutex_deadlock_account_lock(lock, current);
752 return 0;
753 } else
754 return slowfn(lock, state, NULL, detect_deadlock);
755}
756
757static inline int
758rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
759 struct hrtimer_sleeper *timeout, int detect_deadlock,
760 int (*slowfn)(struct rt_mutex *lock, int state,
761 struct hrtimer_sleeper *timeout,
762 int detect_deadlock))
763{
764 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
765 rt_mutex_deadlock_account_lock(lock, current);
766 return 0;
767 } else
768 return slowfn(lock, state, timeout, detect_deadlock);
769}
770
771static inline int
772rt_mutex_fasttrylock(struct rt_mutex *lock,
773 int (*slowfn)(struct rt_mutex *lock))
774{
775 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
776 rt_mutex_deadlock_account_lock(lock, current);
777 return 1;
778 }
779 return slowfn(lock);
780}
781
782static inline void
783rt_mutex_fastunlock(struct rt_mutex *lock,
784 void (*slowfn)(struct rt_mutex *lock))
785{
786 if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
787 rt_mutex_deadlock_account_unlock(current);
788 else
789 slowfn(lock);
790}
791
792/**
793 * rt_mutex_lock - lock a rt_mutex
794 *
795 * @lock: the rt_mutex to be locked
796 */
797void __sched rt_mutex_lock(struct rt_mutex *lock)
798{
799 might_sleep();
800
801 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
802}
803EXPORT_SYMBOL_GPL(rt_mutex_lock);
804
805/**
806 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
807 *
808 * @lock: the rt_mutex to be locked
809 * @detect_deadlock: deadlock detection on/off
810 *
811 * Returns:
812 * 0 on success
813 * -EINTR when interrupted by a signal
814 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
815 */
816int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
817 int detect_deadlock)
818{
819 might_sleep();
820
821 return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
822 detect_deadlock, rt_mutex_slowlock);
823}
824EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
825
826/**
827 * rt_mutex_timed_lock - lock a rt_mutex interruptible
828 * the timeout structure is provided
829 * by the caller
830 *
831 * @lock: the rt_mutex to be locked
832 * @timeout: timeout structure or NULL (no timeout)
833 * @detect_deadlock: deadlock detection on/off
834 *
835 * Returns:
836 * 0 on success
837 * -EINTR when interrupted by a signal
838 * -ETIMEDOUT when the timeout expired
839 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
840 */
841int
842rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
843 int detect_deadlock)
844{
845 might_sleep();
846
847 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
848 detect_deadlock, rt_mutex_slowlock);
849}
850EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
851
852/**
853 * rt_mutex_trylock - try to lock a rt_mutex
854 *
855 * @lock: the rt_mutex to be locked
856 *
857 * Returns 1 on success and 0 on contention
858 */
859int __sched rt_mutex_trylock(struct rt_mutex *lock)
860{
861 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
862}
863EXPORT_SYMBOL_GPL(rt_mutex_trylock);
864
865/**
866 * rt_mutex_unlock - unlock a rt_mutex
867 *
868 * @lock: the rt_mutex to be unlocked
869 */
870void __sched rt_mutex_unlock(struct rt_mutex *lock)
871{
872 rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
873}
874EXPORT_SYMBOL_GPL(rt_mutex_unlock);
875
876/**
877 * rt_mutex_destroy - mark a mutex unusable
878 * @lock: the mutex to be destroyed
879 *
880 * This function marks the mutex uninitialized, and any subsequent
881 * use of the mutex is forbidden. The mutex must not be locked when
882 * this function is called.
883 */
884void rt_mutex_destroy(struct rt_mutex *lock)
885{
886 WARN_ON(rt_mutex_is_locked(lock));
887#ifdef CONFIG_DEBUG_RT_MUTEXES
888 lock->magic = NULL;
889#endif
890}
891
892EXPORT_SYMBOL_GPL(rt_mutex_destroy);
893
894/**
895 * __rt_mutex_init - initialize the rt lock
896 *
897 * @lock: the rt lock to be initialized
898 *
899 * Initialize the rt lock to unlocked state.
900 *
901 * Initializing of a locked rt lock is not allowed
902 */
903void __rt_mutex_init(struct rt_mutex *lock, const char *name)
904{
905 lock->owner = NULL;
906 raw_spin_lock_init(&lock->wait_lock);
907 plist_head_init(&lock->wait_list);
908
909 debug_rt_mutex_init(lock, name);
910}
911EXPORT_SYMBOL_GPL(__rt_mutex_init);
912
913/**
914 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
915 * proxy owner
916 *
917 * @lock: the rt_mutex to be locked
918 * @proxy_owner:the task to set as owner
919 *
920 * No locking. Caller has to do serializing itself
921 * Special API call for PI-futex support
922 */
923void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
924 struct task_struct *proxy_owner)
925{
926 __rt_mutex_init(lock, NULL);
927 debug_rt_mutex_proxy_lock(lock, proxy_owner);
928 rt_mutex_set_owner(lock, proxy_owner);
929 rt_mutex_deadlock_account_lock(lock, proxy_owner);
930}
931
932/**
933 * rt_mutex_proxy_unlock - release a lock on behalf of owner
934 *
935 * @lock: the rt_mutex to be locked
936 *
937 * No locking. Caller has to do serializing itself
938 * Special API call for PI-futex support
939 */
940void rt_mutex_proxy_unlock(struct rt_mutex *lock,
941 struct task_struct *proxy_owner)
942{
943 debug_rt_mutex_proxy_unlock(lock);
944 rt_mutex_set_owner(lock, NULL);
945 rt_mutex_deadlock_account_unlock(proxy_owner);
946}
947
948/**
949 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
950 * @lock: the rt_mutex to take
951 * @waiter: the pre-initialized rt_mutex_waiter
952 * @task: the task to prepare
953 * @detect_deadlock: perform deadlock detection (1) or not (0)
954 *
955 * Returns:
956 * 0 - task blocked on lock
957 * 1 - acquired the lock for task, caller should wake it up
958 * <0 - error
959 *
960 * Special API call for FUTEX_REQUEUE_PI support.
961 */
962int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
963 struct rt_mutex_waiter *waiter,
964 struct task_struct *task, int detect_deadlock)
965{
966 int ret;
967
968 raw_spin_lock(&lock->wait_lock);
969
970 if (try_to_take_rt_mutex(lock, task, NULL)) {
971 raw_spin_unlock(&lock->wait_lock);
972 return 1;
973 }
974
975 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
976
977 if (ret && !rt_mutex_owner(lock)) {
978 /*
979 * Reset the return value. We might have
980 * returned with -EDEADLK and the owner
981 * released the lock while we were walking the
982 * pi chain. Let the waiter sort it out.
983 */
984 ret = 0;
985 }
986
987 if (unlikely(ret))
988 remove_waiter(lock, waiter);
989
990 raw_spin_unlock(&lock->wait_lock);
991
992 debug_rt_mutex_print_deadlock(waiter);
993
994 return ret;
995}
996
997/**
998 * rt_mutex_next_owner - return the next owner of the lock
999 *
1000 * @lock: the rt lock query
1001 *
1002 * Returns the next owner of the lock or NULL
1003 *
1004 * Caller has to serialize against other accessors to the lock
1005 * itself.
1006 *
1007 * Special API call for PI-futex support
1008 */
1009struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1010{
1011 if (!rt_mutex_has_waiters(lock))
1012 return NULL;
1013
1014 return rt_mutex_top_waiter(lock)->task;
1015}
1016
1017/**
1018 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1019 * @lock: the rt_mutex we were woken on
1020 * @to: the timeout, null if none. hrtimer should already have
1021 * been started.
1022 * @waiter: the pre-initialized rt_mutex_waiter
1023 * @detect_deadlock: perform deadlock detection (1) or not (0)
1024 *
1025 * Complete the lock acquisition started our behalf by another thread.
1026 *
1027 * Returns:
1028 * 0 - success
1029 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1030 *
1031 * Special API call for PI-futex requeue support
1032 */
1033int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1034 struct hrtimer_sleeper *to,
1035 struct rt_mutex_waiter *waiter,
1036 int detect_deadlock)
1037{
1038 int ret;
1039
1040 raw_spin_lock(&lock->wait_lock);
1041
1042 set_current_state(TASK_INTERRUPTIBLE);
1043
1044 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1045
1046 set_current_state(TASK_RUNNING);
1047
1048 if (unlikely(ret))
1049 remove_waiter(lock, waiter);
1050
1051 /*
1052 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1053 * have to fix that up.
1054 */
1055 fixup_rt_mutex_waiters(lock);
1056
1057 raw_spin_unlock(&lock->wait_lock);
1058
1059 return ret;
1060}
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
new file mode 100644
index 000000000000..a1a1dd06421d
--- /dev/null
+++ b/kernel/locking/rtmutex.h
@@ -0,0 +1,26 @@
1/*
2 * RT-Mutexes: blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner:
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * This file contains macros used solely by rtmutex.c.
10 * Non-debug version.
11 */
12
13#define rt_mutex_deadlock_check(l) (0)
14#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
15#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
16#define debug_rt_mutex_init_waiter(w) do { } while (0)
17#define debug_rt_mutex_free_waiter(w) do { } while (0)
18#define debug_rt_mutex_lock(l) do { } while (0)
19#define debug_rt_mutex_proxy_lock(l,p) do { } while (0)
20#define debug_rt_mutex_proxy_unlock(l) do { } while (0)
21#define debug_rt_mutex_unlock(l) do { } while (0)
22#define debug_rt_mutex_init(m, n) do { } while (0)
23#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
24#define debug_rt_mutex_print_deadlock(w) do { } while (0)
25#define debug_rt_mutex_detect_deadlock(w,d) (d)
26#define debug_rt_mutex_reset_waiter(w) do { } while (0)
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
new file mode 100644
index 000000000000..53a66c85261b
--- /dev/null
+++ b/kernel/locking/rtmutex_common.h
@@ -0,0 +1,126 @@
1/*
2 * RT Mutexes: blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner:
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * This file contains the private data structure and API definitions.
10 */
11
12#ifndef __KERNEL_RTMUTEX_COMMON_H
13#define __KERNEL_RTMUTEX_COMMON_H
14
15#include <linux/rtmutex.h>
16
17/*
18 * The rtmutex in kernel tester is independent of rtmutex debugging. We
19 * call schedule_rt_mutex_test() instead of schedule() for the tasks which
20 * belong to the tester. That way we can delay the wakeup path of those
21 * threads to provoke lock stealing and testing of complex boosting scenarios.
22 */
23#ifdef CONFIG_RT_MUTEX_TESTER
24
25extern void schedule_rt_mutex_test(struct rt_mutex *lock);
26
27#define schedule_rt_mutex(_lock) \
28 do { \
29 if (!(current->flags & PF_MUTEX_TESTER)) \
30 schedule(); \
31 else \
32 schedule_rt_mutex_test(_lock); \
33 } while (0)
34
35#else
36# define schedule_rt_mutex(_lock) schedule()
37#endif
38
39/*
40 * This is the control structure for tasks blocked on a rt_mutex,
41 * which is allocated on the kernel stack on of the blocked task.
42 *
43 * @list_entry: pi node to enqueue into the mutex waiters list
44 * @pi_list_entry: pi node to enqueue into the mutex owner waiters list
45 * @task: task reference to the blocked task
46 */
47struct rt_mutex_waiter {
48 struct plist_node list_entry;
49 struct plist_node pi_list_entry;
50 struct task_struct *task;
51 struct rt_mutex *lock;
52#ifdef CONFIG_DEBUG_RT_MUTEXES
53 unsigned long ip;
54 struct pid *deadlock_task_pid;
55 struct rt_mutex *deadlock_lock;
56#endif
57};
58
59/*
60 * Various helpers to access the waiters-plist:
61 */
62static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
63{
64 return !plist_head_empty(&lock->wait_list);
65}
66
67static inline struct rt_mutex_waiter *
68rt_mutex_top_waiter(struct rt_mutex *lock)
69{
70 struct rt_mutex_waiter *w;
71
72 w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter,
73 list_entry);
74 BUG_ON(w->lock != lock);
75
76 return w;
77}
78
79static inline int task_has_pi_waiters(struct task_struct *p)
80{
81 return !plist_head_empty(&p->pi_waiters);
82}
83
84static inline struct rt_mutex_waiter *
85task_top_pi_waiter(struct task_struct *p)
86{
87 return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter,
88 pi_list_entry);
89}
90
91/*
92 * lock->owner state tracking:
93 */
94#define RT_MUTEX_HAS_WAITERS 1UL
95#define RT_MUTEX_OWNER_MASKALL 1UL
96
97static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
98{
99 return (struct task_struct *)
100 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
101}
102
103/*
104 * PI-futex support (proxy locking functions, etc.):
105 */
106extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
107extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
108 struct task_struct *proxy_owner);
109extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
110 struct task_struct *proxy_owner);
111extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
112 struct rt_mutex_waiter *waiter,
113 struct task_struct *task,
114 int detect_deadlock);
115extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
116 struct hrtimer_sleeper *to,
117 struct rt_mutex_waiter *waiter,
118 int detect_deadlock);
119
120#ifdef CONFIG_DEBUG_RT_MUTEXES
121# include "rtmutex-debug.h"
122#else
123# include "rtmutex.h"
124#endif
125
126#endif
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
new file mode 100644
index 000000000000..9be8a9144978
--- /dev/null
+++ b/kernel/locking/rwsem-spinlock.c
@@ -0,0 +1,296 @@
1/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
2 * generic spinlock implementation
3 *
4 * Copyright (c) 2001 David Howells (dhowells@redhat.com).
5 * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
6 * - Derived also from comments by Linus
7 */
8#include <linux/rwsem.h>
9#include <linux/sched.h>
10#include <linux/export.h>
11
12enum rwsem_waiter_type {
13 RWSEM_WAITING_FOR_WRITE,
14 RWSEM_WAITING_FOR_READ
15};
16
17struct rwsem_waiter {
18 struct list_head list;
19 struct task_struct *task;
20 enum rwsem_waiter_type type;
21};
22
23int rwsem_is_locked(struct rw_semaphore *sem)
24{
25 int ret = 1;
26 unsigned long flags;
27
28 if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
29 ret = (sem->activity != 0);
30 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
31 }
32 return ret;
33}
34EXPORT_SYMBOL(rwsem_is_locked);
35
36/*
37 * initialise the semaphore
38 */
39void __init_rwsem(struct rw_semaphore *sem, const char *name,
40 struct lock_class_key *key)
41{
42#ifdef CONFIG_DEBUG_LOCK_ALLOC
43 /*
44 * Make sure we are not reinitializing a held semaphore:
45 */
46 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
47 lockdep_init_map(&sem->dep_map, name, key, 0);
48#endif
49 sem->activity = 0;
50 raw_spin_lock_init(&sem->wait_lock);
51 INIT_LIST_HEAD(&sem->wait_list);
52}
53EXPORT_SYMBOL(__init_rwsem);
54
55/*
56 * handle the lock release when processes blocked on it that can now run
57 * - if we come here, then:
58 * - the 'active count' _reached_ zero
59 * - the 'waiting count' is non-zero
60 * - the spinlock must be held by the caller
61 * - woken process blocks are discarded from the list after having task zeroed
62 * - writers are only woken if wakewrite is non-zero
63 */
64static inline struct rw_semaphore *
65__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
66{
67 struct rwsem_waiter *waiter;
68 struct task_struct *tsk;
69 int woken;
70
71 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
72
73 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
74 if (wakewrite)
75 /* Wake up a writer. Note that we do not grant it the
76 * lock - it will have to acquire it when it runs. */
77 wake_up_process(waiter->task);
78 goto out;
79 }
80
81 /* grant an infinite number of read locks to the front of the queue */
82 woken = 0;
83 do {
84 struct list_head *next = waiter->list.next;
85
86 list_del(&waiter->list);
87 tsk = waiter->task;
88 smp_mb();
89 waiter->task = NULL;
90 wake_up_process(tsk);
91 put_task_struct(tsk);
92 woken++;
93 if (next == &sem->wait_list)
94 break;
95 waiter = list_entry(next, struct rwsem_waiter, list);
96 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
97
98 sem->activity += woken;
99
100 out:
101 return sem;
102}
103
104/*
105 * wake a single writer
106 */
107static inline struct rw_semaphore *
108__rwsem_wake_one_writer(struct rw_semaphore *sem)
109{
110 struct rwsem_waiter *waiter;
111
112 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
113 wake_up_process(waiter->task);
114
115 return sem;
116}
117
118/*
119 * get a read lock on the semaphore
120 */
121void __sched __down_read(struct rw_semaphore *sem)
122{
123 struct rwsem_waiter waiter;
124 struct task_struct *tsk;
125 unsigned long flags;
126
127 raw_spin_lock_irqsave(&sem->wait_lock, flags);
128
129 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
130 /* granted */
131 sem->activity++;
132 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
133 goto out;
134 }
135
136 tsk = current;
137 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
138
139 /* set up my own style of waitqueue */
140 waiter.task = tsk;
141 waiter.type = RWSEM_WAITING_FOR_READ;
142 get_task_struct(tsk);
143
144 list_add_tail(&waiter.list, &sem->wait_list);
145
146 /* we don't need to touch the semaphore struct anymore */
147 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
148
149 /* wait to be given the lock */
150 for (;;) {
151 if (!waiter.task)
152 break;
153 schedule();
154 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
155 }
156
157 tsk->state = TASK_RUNNING;
158 out:
159 ;
160}
161
162/*
163 * trylock for reading -- returns 1 if successful, 0 if contention
164 */
165int __down_read_trylock(struct rw_semaphore *sem)
166{
167 unsigned long flags;
168 int ret = 0;
169
170
171 raw_spin_lock_irqsave(&sem->wait_lock, flags);
172
173 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
174 /* granted */
175 sem->activity++;
176 ret = 1;
177 }
178
179 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
180
181 return ret;
182}
183
184/*
185 * get a write lock on the semaphore
186 */
187void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
188{
189 struct rwsem_waiter waiter;
190 struct task_struct *tsk;
191 unsigned long flags;
192
193 raw_spin_lock_irqsave(&sem->wait_lock, flags);
194
195 /* set up my own style of waitqueue */
196 tsk = current;
197 waiter.task = tsk;
198 waiter.type = RWSEM_WAITING_FOR_WRITE;
199 list_add_tail(&waiter.list, &sem->wait_list);
200
201 /* wait for someone to release the lock */
202 for (;;) {
203 /*
204 * That is the key to support write lock stealing: allows the
205 * task already on CPU to get the lock soon rather than put
206 * itself into sleep and waiting for system woke it or someone
207 * else in the head of the wait list up.
208 */
209 if (sem->activity == 0)
210 break;
211 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
212 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
213 schedule();
214 raw_spin_lock_irqsave(&sem->wait_lock, flags);
215 }
216 /* got the lock */
217 sem->activity = -1;
218 list_del(&waiter.list);
219
220 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
221}
222
223void __sched __down_write(struct rw_semaphore *sem)
224{
225 __down_write_nested(sem, 0);
226}
227
228/*
229 * trylock for writing -- returns 1 if successful, 0 if contention
230 */
231int __down_write_trylock(struct rw_semaphore *sem)
232{
233 unsigned long flags;
234 int ret = 0;
235
236 raw_spin_lock_irqsave(&sem->wait_lock, flags);
237
238 if (sem->activity == 0) {
239 /* got the lock */
240 sem->activity = -1;
241 ret = 1;
242 }
243
244 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
245
246 return ret;
247}
248
249/*
250 * release a read lock on the semaphore
251 */
252void __up_read(struct rw_semaphore *sem)
253{
254 unsigned long flags;
255
256 raw_spin_lock_irqsave(&sem->wait_lock, flags);
257
258 if (--sem->activity == 0 && !list_empty(&sem->wait_list))
259 sem = __rwsem_wake_one_writer(sem);
260
261 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
262}
263
264/*
265 * release a write lock on the semaphore
266 */
267void __up_write(struct rw_semaphore *sem)
268{
269 unsigned long flags;
270
271 raw_spin_lock_irqsave(&sem->wait_lock, flags);
272
273 sem->activity = 0;
274 if (!list_empty(&sem->wait_list))
275 sem = __rwsem_do_wake(sem, 1);
276
277 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
278}
279
280/*
281 * downgrade a write lock into a read lock
282 * - just wake up any readers at the front of the queue
283 */
284void __downgrade_write(struct rw_semaphore *sem)
285{
286 unsigned long flags;
287
288 raw_spin_lock_irqsave(&sem->wait_lock, flags);
289
290 sem->activity = 1;
291 if (!list_empty(&sem->wait_list))
292 sem = __rwsem_do_wake(sem, 0);
293
294 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
295}
296
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
new file mode 100644
index 000000000000..19c5fa95e0b4
--- /dev/null
+++ b/kernel/locking/rwsem-xadd.c
@@ -0,0 +1,293 @@
1/* rwsem.c: R/W semaphores: contention handling functions
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c
5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com>
8 */
9#include <linux/rwsem.h>
10#include <linux/sched.h>
11#include <linux/init.h>
12#include <linux/export.h>
13
14/*
15 * Initialize an rwsem:
16 */
17void __init_rwsem(struct rw_semaphore *sem, const char *name,
18 struct lock_class_key *key)
19{
20#ifdef CONFIG_DEBUG_LOCK_ALLOC
21 /*
22 * Make sure we are not reinitializing a held semaphore:
23 */
24 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
25 lockdep_init_map(&sem->dep_map, name, key, 0);
26#endif
27 sem->count = RWSEM_UNLOCKED_VALUE;
28 raw_spin_lock_init(&sem->wait_lock);
29 INIT_LIST_HEAD(&sem->wait_list);
30}
31
32EXPORT_SYMBOL(__init_rwsem);
33
34enum rwsem_waiter_type {
35 RWSEM_WAITING_FOR_WRITE,
36 RWSEM_WAITING_FOR_READ
37};
38
39struct rwsem_waiter {
40 struct list_head list;
41 struct task_struct *task;
42 enum rwsem_waiter_type type;
43};
44
45enum rwsem_wake_type {
46 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
47 RWSEM_WAKE_READERS, /* Wake readers only */
48 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
49};
50
51/*
52 * handle the lock release when processes blocked on it that can now run
53 * - if we come here from up_xxxx(), then:
54 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
55 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
56 * - there must be someone on the queue
57 * - the spinlock must be held by the caller
58 * - woken process blocks are discarded from the list after having task zeroed
59 * - writers are only woken if downgrading is false
60 */
61static struct rw_semaphore *
62__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
63{
64 struct rwsem_waiter *waiter;
65 struct task_struct *tsk;
66 struct list_head *next;
67 long oldcount, woken, loop, adjustment;
68
69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
70 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
71 if (wake_type == RWSEM_WAKE_ANY)
72 /* Wake writer at the front of the queue, but do not
73 * grant it the lock yet as we want other writers
74 * to be able to steal it. Readers, on the other hand,
75 * will block as they will notice the queued writer.
76 */
77 wake_up_process(waiter->task);
78 goto out;
79 }
80
81 /* Writers might steal the lock before we grant it to the next reader.
82 * We prefer to do the first reader grant before counting readers
83 * so we can bail out early if a writer stole the lock.
84 */
85 adjustment = 0;
86 if (wake_type != RWSEM_WAKE_READ_OWNED) {
87 adjustment = RWSEM_ACTIVE_READ_BIAS;
88 try_reader_grant:
89 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
90 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
91 /* A writer stole the lock. Undo our reader grant. */
92 if (rwsem_atomic_update(-adjustment, sem) &
93 RWSEM_ACTIVE_MASK)
94 goto out;
95 /* Last active locker left. Retry waking readers. */
96 goto try_reader_grant;
97 }
98 }
99
100 /* Grant an infinite number of read locks to the readers at the front
101 * of the queue. Note we increment the 'active part' of the count by
102 * the number of readers before waking any processes up.
103 */
104 woken = 0;
105 do {
106 woken++;
107
108 if (waiter->list.next == &sem->wait_list)
109 break;
110
111 waiter = list_entry(waiter->list.next,
112 struct rwsem_waiter, list);
113
114 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
115
116 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
117 if (waiter->type != RWSEM_WAITING_FOR_WRITE)
118 /* hit end of list above */
119 adjustment -= RWSEM_WAITING_BIAS;
120
121 if (adjustment)
122 rwsem_atomic_add(adjustment, sem);
123
124 next = sem->wait_list.next;
125 loop = woken;
126 do {
127 waiter = list_entry(next, struct rwsem_waiter, list);
128 next = waiter->list.next;
129 tsk = waiter->task;
130 smp_mb();
131 waiter->task = NULL;
132 wake_up_process(tsk);
133 put_task_struct(tsk);
134 } while (--loop);
135
136 sem->wait_list.next = next;
137 next->prev = &sem->wait_list;
138
139 out:
140 return sem;
141}
142
143/*
144 * wait for the read lock to be granted
145 */
146struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
147{
148 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
149 struct rwsem_waiter waiter;
150 struct task_struct *tsk = current;
151
152 /* set up my own style of waitqueue */
153 waiter.task = tsk;
154 waiter.type = RWSEM_WAITING_FOR_READ;
155 get_task_struct(tsk);
156
157 raw_spin_lock_irq(&sem->wait_lock);
158 if (list_empty(&sem->wait_list))
159 adjustment += RWSEM_WAITING_BIAS;
160 list_add_tail(&waiter.list, &sem->wait_list);
161
162 /* we're now waiting on the lock, but no longer actively locking */
163 count = rwsem_atomic_update(adjustment, sem);
164
165 /* If there are no active locks, wake the front queued process(es).
166 *
167 * If there are no writers and we are first in the queue,
168 * wake our own waiter to join the existing active readers !
169 */
170 if (count == RWSEM_WAITING_BIAS ||
171 (count > RWSEM_WAITING_BIAS &&
172 adjustment != -RWSEM_ACTIVE_READ_BIAS))
173 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
174
175 raw_spin_unlock_irq(&sem->wait_lock);
176
177 /* wait to be given the lock */
178 while (true) {
179 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
180 if (!waiter.task)
181 break;
182 schedule();
183 }
184
185 tsk->state = TASK_RUNNING;
186
187 return sem;
188}
189
190/*
191 * wait until we successfully acquire the write lock
192 */
193struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
194{
195 long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
196 struct rwsem_waiter waiter;
197 struct task_struct *tsk = current;
198
199 /* set up my own style of waitqueue */
200 waiter.task = tsk;
201 waiter.type = RWSEM_WAITING_FOR_WRITE;
202
203 raw_spin_lock_irq(&sem->wait_lock);
204 if (list_empty(&sem->wait_list))
205 adjustment += RWSEM_WAITING_BIAS;
206 list_add_tail(&waiter.list, &sem->wait_list);
207
208 /* we're now waiting on the lock, but no longer actively locking */
209 count = rwsem_atomic_update(adjustment, sem);
210
211 /* If there were already threads queued before us and there are no
212 * active writers, the lock must be read owned; so we try to wake
213 * any read locks that were queued ahead of us. */
214 if (count > RWSEM_WAITING_BIAS &&
215 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
216 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
217
218 /* wait until we successfully acquire the lock */
219 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
220 while (true) {
221 if (!(count & RWSEM_ACTIVE_MASK)) {
222 /* Try acquiring the write lock. */
223 count = RWSEM_ACTIVE_WRITE_BIAS;
224 if (!list_is_singular(&sem->wait_list))
225 count += RWSEM_WAITING_BIAS;
226
227 if (sem->count == RWSEM_WAITING_BIAS &&
228 cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
229 RWSEM_WAITING_BIAS)
230 break;
231 }
232
233 raw_spin_unlock_irq(&sem->wait_lock);
234
235 /* Block until there are no active lockers. */
236 do {
237 schedule();
238 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
239 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
240
241 raw_spin_lock_irq(&sem->wait_lock);
242 }
243
244 list_del(&waiter.list);
245 raw_spin_unlock_irq(&sem->wait_lock);
246 tsk->state = TASK_RUNNING;
247
248 return sem;
249}
250
251/*
252 * handle waking up a waiter on the semaphore
253 * - up_read/up_write has decremented the active part of count if we come here
254 */
255struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
256{
257 unsigned long flags;
258
259 raw_spin_lock_irqsave(&sem->wait_lock, flags);
260
261 /* do nothing if list empty */
262 if (!list_empty(&sem->wait_list))
263 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
264
265 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
266
267 return sem;
268}
269
270/*
271 * downgrade a write lock into a read lock
272 * - caller incremented waiting part of count and discovered it still negative
273 * - just wake up any readers at the front of the queue
274 */
275struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
276{
277 unsigned long flags;
278
279 raw_spin_lock_irqsave(&sem->wait_lock, flags);
280
281 /* do nothing if list empty */
282 if (!list_empty(&sem->wait_list))
283 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
284
285 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
286
287 return sem;
288}
289
290EXPORT_SYMBOL(rwsem_down_read_failed);
291EXPORT_SYMBOL(rwsem_down_write_failed);
292EXPORT_SYMBOL(rwsem_wake);
293EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
new file mode 100644
index 000000000000..cfff1435bdfb
--- /dev/null
+++ b/kernel/locking/rwsem.c
@@ -0,0 +1,157 @@
1/* kernel/rwsem.c: R/W semaphores, public implementation
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from asm-i386/semaphore.h
5 */
6
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/sched.h>
10#include <linux/export.h>
11#include <linux/rwsem.h>
12
13#include <linux/atomic.h>
14
15/*
16 * lock for reading
17 */
18void __sched down_read(struct rw_semaphore *sem)
19{
20 might_sleep();
21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
22
23 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
24}
25
26EXPORT_SYMBOL(down_read);
27
28/*
29 * trylock for reading -- returns 1 if successful, 0 if contention
30 */
31int down_read_trylock(struct rw_semaphore *sem)
32{
33 int ret = __down_read_trylock(sem);
34
35 if (ret == 1)
36 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
37 return ret;
38}
39
40EXPORT_SYMBOL(down_read_trylock);
41
42/*
43 * lock for writing
44 */
45void __sched down_write(struct rw_semaphore *sem)
46{
47 might_sleep();
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49
50 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
51}
52
53EXPORT_SYMBOL(down_write);
54
55/*
56 * trylock for writing -- returns 1 if successful, 0 if contention
57 */
58int down_write_trylock(struct rw_semaphore *sem)
59{
60 int ret = __down_write_trylock(sem);
61
62 if (ret == 1)
63 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
64 return ret;
65}
66
67EXPORT_SYMBOL(down_write_trylock);
68
69/*
70 * release a read lock
71 */
72void up_read(struct rw_semaphore *sem)
73{
74 rwsem_release(&sem->dep_map, 1, _RET_IP_);
75
76 __up_read(sem);
77}
78
79EXPORT_SYMBOL(up_read);
80
81/*
82 * release a write lock
83 */
84void up_write(struct rw_semaphore *sem)
85{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87
88 __up_write(sem);
89}
90
91EXPORT_SYMBOL(up_write);
92
93/*
94 * downgrade write lock to read lock
95 */
96void downgrade_write(struct rw_semaphore *sem)
97{
98 /*
99 * lockdep: a downgraded write will live on as a write
100 * dependency.
101 */
102 __downgrade_write(sem);
103}
104
105EXPORT_SYMBOL(downgrade_write);
106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108
109void down_read_nested(struct rw_semaphore *sem, int subclass)
110{
111 might_sleep();
112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
113
114 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
115}
116
117EXPORT_SYMBOL(down_read_nested);
118
119void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
120{
121 might_sleep();
122 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
123
124 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
125}
126
127EXPORT_SYMBOL(_down_write_nest_lock);
128
129void down_read_non_owner(struct rw_semaphore *sem)
130{
131 might_sleep();
132
133 __down_read(sem);
134}
135
136EXPORT_SYMBOL(down_read_non_owner);
137
138void down_write_nested(struct rw_semaphore *sem, int subclass)
139{
140 might_sleep();
141 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
142
143 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
144}
145
146EXPORT_SYMBOL(down_write_nested);
147
148void up_read_non_owner(struct rw_semaphore *sem)
149{
150 __up_read(sem);
151}
152
153EXPORT_SYMBOL(up_read_non_owner);
154
155#endif
156
157
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
new file mode 100644
index 000000000000..6815171a4fff
--- /dev/null
+++ b/kernel/locking/semaphore.c
@@ -0,0 +1,263 @@
1/*
2 * Copyright (c) 2008 Intel Corporation
3 * Author: Matthew Wilcox <willy@linux.intel.com>
4 *
5 * Distributed under the terms of the GNU GPL, version 2
6 *
7 * This file implements counting semaphores.
8 * A counting semaphore may be acquired 'n' times before sleeping.
9 * See mutex.c for single-acquisition sleeping locks which enforce
10 * rules which allow code to be debugged more easily.
11 */
12
13/*
14 * Some notes on the implementation:
15 *
16 * The spinlock controls access to the other members of the semaphore.
17 * down_trylock() and up() can be called from interrupt context, so we
18 * have to disable interrupts when taking the lock. It turns out various
19 * parts of the kernel expect to be able to use down() on a semaphore in
20 * interrupt context when they know it will succeed, so we have to use
21 * irqsave variants for down(), down_interruptible() and down_killable()
22 * too.
23 *
24 * The ->count variable represents how many more tasks can acquire this
25 * semaphore. If it's zero, there may be tasks waiting on the wait_list.
26 */
27
28#include <linux/compiler.h>
29#include <linux/kernel.h>
30#include <linux/export.h>
31#include <linux/sched.h>
32#include <linux/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/ftrace.h>
35
36static noinline void __down(struct semaphore *sem);
37static noinline int __down_interruptible(struct semaphore *sem);
38static noinline int __down_killable(struct semaphore *sem);
39static noinline int __down_timeout(struct semaphore *sem, long jiffies);
40static noinline void __up(struct semaphore *sem);
41
42/**
43 * down - acquire the semaphore
44 * @sem: the semaphore to be acquired
45 *
46 * Acquires the semaphore. If no more tasks are allowed to acquire the
47 * semaphore, calling this function will put the task to sleep until the
48 * semaphore is released.
49 *
50 * Use of this function is deprecated, please use down_interruptible() or
51 * down_killable() instead.
52 */
53void down(struct semaphore *sem)
54{
55 unsigned long flags;
56
57 raw_spin_lock_irqsave(&sem->lock, flags);
58 if (likely(sem->count > 0))
59 sem->count--;
60 else
61 __down(sem);
62 raw_spin_unlock_irqrestore(&sem->lock, flags);
63}
64EXPORT_SYMBOL(down);
65
66/**
67 * down_interruptible - acquire the semaphore unless interrupted
68 * @sem: the semaphore to be acquired
69 *
70 * Attempts to acquire the semaphore. If no more tasks are allowed to
71 * acquire the semaphore, calling this function will put the task to sleep.
72 * If the sleep is interrupted by a signal, this function will return -EINTR.
73 * If the semaphore is successfully acquired, this function returns 0.
74 */
75int down_interruptible(struct semaphore *sem)
76{
77 unsigned long flags;
78 int result = 0;
79
80 raw_spin_lock_irqsave(&sem->lock, flags);
81 if (likely(sem->count > 0))
82 sem->count--;
83 else
84 result = __down_interruptible(sem);
85 raw_spin_unlock_irqrestore(&sem->lock, flags);
86
87 return result;
88}
89EXPORT_SYMBOL(down_interruptible);
90
91/**
92 * down_killable - acquire the semaphore unless killed
93 * @sem: the semaphore to be acquired
94 *
95 * Attempts to acquire the semaphore. If no more tasks are allowed to
96 * acquire the semaphore, calling this function will put the task to sleep.
97 * If the sleep is interrupted by a fatal signal, this function will return
98 * -EINTR. If the semaphore is successfully acquired, this function returns
99 * 0.
100 */
101int down_killable(struct semaphore *sem)
102{
103 unsigned long flags;
104 int result = 0;
105
106 raw_spin_lock_irqsave(&sem->lock, flags);
107 if (likely(sem->count > 0))
108 sem->count--;
109 else
110 result = __down_killable(sem);
111 raw_spin_unlock_irqrestore(&sem->lock, flags);
112
113 return result;
114}
115EXPORT_SYMBOL(down_killable);
116
117/**
118 * down_trylock - try to acquire the semaphore, without waiting
119 * @sem: the semaphore to be acquired
120 *
121 * Try to acquire the semaphore atomically. Returns 0 if the semaphore has
122 * been acquired successfully or 1 if it it cannot be acquired.
123 *
124 * NOTE: This return value is inverted from both spin_trylock and
125 * mutex_trylock! Be careful about this when converting code.
126 *
127 * Unlike mutex_trylock, this function can be used from interrupt context,
128 * and the semaphore can be released by any task or interrupt.
129 */
130int down_trylock(struct semaphore *sem)
131{
132 unsigned long flags;
133 int count;
134
135 raw_spin_lock_irqsave(&sem->lock, flags);
136 count = sem->count - 1;
137 if (likely(count >= 0))
138 sem->count = count;
139 raw_spin_unlock_irqrestore(&sem->lock, flags);
140
141 return (count < 0);
142}
143EXPORT_SYMBOL(down_trylock);
144
145/**
146 * down_timeout - acquire the semaphore within a specified time
147 * @sem: the semaphore to be acquired
148 * @jiffies: how long to wait before failing
149 *
150 * Attempts to acquire the semaphore. If no more tasks are allowed to
151 * acquire the semaphore, calling this function will put the task to sleep.
152 * If the semaphore is not released within the specified number of jiffies,
153 * this function returns -ETIME. It returns 0 if the semaphore was acquired.
154 */
155int down_timeout(struct semaphore *sem, long jiffies)
156{
157 unsigned long flags;
158 int result = 0;
159
160 raw_spin_lock_irqsave(&sem->lock, flags);
161 if (likely(sem->count > 0))
162 sem->count--;
163 else
164 result = __down_timeout(sem, jiffies);
165 raw_spin_unlock_irqrestore(&sem->lock, flags);
166
167 return result;
168}
169EXPORT_SYMBOL(down_timeout);
170
171/**
172 * up - release the semaphore
173 * @sem: the semaphore to release
174 *
175 * Release the semaphore. Unlike mutexes, up() may be called from any
176 * context and even by tasks which have never called down().
177 */
178void up(struct semaphore *sem)
179{
180 unsigned long flags;
181
182 raw_spin_lock_irqsave(&sem->lock, flags);
183 if (likely(list_empty(&sem->wait_list)))
184 sem->count++;
185 else
186 __up(sem);
187 raw_spin_unlock_irqrestore(&sem->lock, flags);
188}
189EXPORT_SYMBOL(up);
190
191/* Functions for the contended case */
192
193struct semaphore_waiter {
194 struct list_head list;
195 struct task_struct *task;
196 bool up;
197};
198
199/*
200 * Because this function is inlined, the 'state' parameter will be
201 * constant, and thus optimised away by the compiler. Likewise the
202 * 'timeout' parameter for the cases without timeouts.
203 */
204static inline int __sched __down_common(struct semaphore *sem, long state,
205 long timeout)
206{
207 struct task_struct *task = current;
208 struct semaphore_waiter waiter;
209
210 list_add_tail(&waiter.list, &sem->wait_list);
211 waiter.task = task;
212 waiter.up = false;
213
214 for (;;) {
215 if (signal_pending_state(state, task))
216 goto interrupted;
217 if (unlikely(timeout <= 0))
218 goto timed_out;
219 __set_task_state(task, state);
220 raw_spin_unlock_irq(&sem->lock);
221 timeout = schedule_timeout(timeout);
222 raw_spin_lock_irq(&sem->lock);
223 if (waiter.up)
224 return 0;
225 }
226
227 timed_out:
228 list_del(&waiter.list);
229 return -ETIME;
230
231 interrupted:
232 list_del(&waiter.list);
233 return -EINTR;
234}
235
236static noinline void __sched __down(struct semaphore *sem)
237{
238 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
239}
240
241static noinline int __sched __down_interruptible(struct semaphore *sem)
242{
243 return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
244}
245
246static noinline int __sched __down_killable(struct semaphore *sem)
247{
248 return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
249}
250
251static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
252{
253 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
254}
255
256static noinline void __sched __up(struct semaphore *sem)
257{
258 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
259 struct semaphore_waiter, list);
260 list_del(&waiter->list);
261 waiter->up = true;
262 wake_up_process(waiter->task);
263}
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
new file mode 100644
index 000000000000..4b082b5cac9e
--- /dev/null
+++ b/kernel/locking/spinlock.c
@@ -0,0 +1,399 @@
1/*
2 * Copyright (2004) Linus Torvalds
3 *
4 * Author: Zwane Mwaikambo <zwane@fsmlabs.com>
5 *
6 * Copyright (2004, 2005) Ingo Molnar
7 *
8 * This file contains the spinlock/rwlock implementations for the
9 * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
10 *
11 * Note that some architectures have special knowledge about the
12 * stack frames of these functions in their profile_pc. If you
13 * change anything significant here that could change the stack
14 * frame contact the architecture maintainers.
15 */
16
17#include <linux/linkage.h>
18#include <linux/preempt.h>
19#include <linux/spinlock.h>
20#include <linux/interrupt.h>
21#include <linux/debug_locks.h>
22#include <linux/export.h>
23
24/*
25 * If lockdep is enabled then we use the non-preemption spin-ops
26 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
27 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
28 */
29#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
30/*
31 * The __lock_function inlines are taken from
32 * include/linux/spinlock_api_smp.h
33 */
34#else
35#define raw_read_can_lock(l) read_can_lock(l)
36#define raw_write_can_lock(l) write_can_lock(l)
37
38/*
39 * Some architectures can relax in favour of the CPU owning the lock.
40 */
41#ifndef arch_read_relax
42# define arch_read_relax(l) cpu_relax()
43#endif
44#ifndef arch_write_relax
45# define arch_write_relax(l) cpu_relax()
46#endif
47#ifndef arch_spin_relax
48# define arch_spin_relax(l) cpu_relax()
49#endif
50
51/*
52 * We build the __lock_function inlines here. They are too large for
53 * inlining all over the place, but here is only one user per function
54 * which embedds them into the calling _lock_function below.
55 *
56 * This could be a long-held lock. We both prepare to spin for a long
57 * time (making _this_ CPU preemptable if possible), and we also signal
58 * towards that other CPU that it should break the lock ASAP.
59 */
60#define BUILD_LOCK_OPS(op, locktype) \
61void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
62{ \
63 for (;;) { \
64 preempt_disable(); \
65 if (likely(do_raw_##op##_trylock(lock))) \
66 break; \
67 preempt_enable(); \
68 \
69 if (!(lock)->break_lock) \
70 (lock)->break_lock = 1; \
71 while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
72 arch_##op##_relax(&lock->raw_lock); \
73 } \
74 (lock)->break_lock = 0; \
75} \
76 \
77unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
78{ \
79 unsigned long flags; \
80 \
81 for (;;) { \
82 preempt_disable(); \
83 local_irq_save(flags); \
84 if (likely(do_raw_##op##_trylock(lock))) \
85 break; \
86 local_irq_restore(flags); \
87 preempt_enable(); \
88 \
89 if (!(lock)->break_lock) \
90 (lock)->break_lock = 1; \
91 while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
92 arch_##op##_relax(&lock->raw_lock); \
93 } \
94 (lock)->break_lock = 0; \
95 return flags; \
96} \
97 \
98void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \
99{ \
100 _raw_##op##_lock_irqsave(lock); \
101} \
102 \
103void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
104{ \
105 unsigned long flags; \
106 \
107 /* */ \
108 /* Careful: we must exclude softirqs too, hence the */ \
109 /* irq-disabling. We use the generic preemption-aware */ \
110 /* function: */ \
111 /**/ \
112 flags = _raw_##op##_lock_irqsave(lock); \
113 local_bh_disable(); \
114 local_irq_restore(flags); \
115} \
116
117/*
118 * Build preemption-friendly versions of the following
119 * lock-spinning functions:
120 *
121 * __[spin|read|write]_lock()
122 * __[spin|read|write]_lock_irq()
123 * __[spin|read|write]_lock_irqsave()
124 * __[spin|read|write]_lock_bh()
125 */
126BUILD_LOCK_OPS(spin, raw_spinlock);
127BUILD_LOCK_OPS(read, rwlock);
128BUILD_LOCK_OPS(write, rwlock);
129
130#endif
131
132#ifndef CONFIG_INLINE_SPIN_TRYLOCK
133int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
134{
135 return __raw_spin_trylock(lock);
136}
137EXPORT_SYMBOL(_raw_spin_trylock);
138#endif
139
140#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
141int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
142{
143 return __raw_spin_trylock_bh(lock);
144}
145EXPORT_SYMBOL(_raw_spin_trylock_bh);
146#endif
147
148#ifndef CONFIG_INLINE_SPIN_LOCK
149void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
150{
151 __raw_spin_lock(lock);
152}
153EXPORT_SYMBOL(_raw_spin_lock);
154#endif
155
156#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
157unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
158{
159 return __raw_spin_lock_irqsave(lock);
160}
161EXPORT_SYMBOL(_raw_spin_lock_irqsave);
162#endif
163
164#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
165void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
166{
167 __raw_spin_lock_irq(lock);
168}
169EXPORT_SYMBOL(_raw_spin_lock_irq);
170#endif
171
172#ifndef CONFIG_INLINE_SPIN_LOCK_BH
173void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
174{
175 __raw_spin_lock_bh(lock);
176}
177EXPORT_SYMBOL(_raw_spin_lock_bh);
178#endif
179
180#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
181void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
182{
183 __raw_spin_unlock(lock);
184}
185EXPORT_SYMBOL(_raw_spin_unlock);
186#endif
187
188#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
189void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
190{
191 __raw_spin_unlock_irqrestore(lock, flags);
192}
193EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
194#endif
195
196#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
197void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
198{
199 __raw_spin_unlock_irq(lock);
200}
201EXPORT_SYMBOL(_raw_spin_unlock_irq);
202#endif
203
204#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
205void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
206{
207 __raw_spin_unlock_bh(lock);
208}
209EXPORT_SYMBOL(_raw_spin_unlock_bh);
210#endif
211
212#ifndef CONFIG_INLINE_READ_TRYLOCK
213int __lockfunc _raw_read_trylock(rwlock_t *lock)
214{
215 return __raw_read_trylock(lock);
216}
217EXPORT_SYMBOL(_raw_read_trylock);
218#endif
219
220#ifndef CONFIG_INLINE_READ_LOCK
221void __lockfunc _raw_read_lock(rwlock_t *lock)
222{
223 __raw_read_lock(lock);
224}
225EXPORT_SYMBOL(_raw_read_lock);
226#endif
227
228#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
229unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
230{
231 return __raw_read_lock_irqsave(lock);
232}
233EXPORT_SYMBOL(_raw_read_lock_irqsave);
234#endif
235
236#ifndef CONFIG_INLINE_READ_LOCK_IRQ
237void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
238{
239 __raw_read_lock_irq(lock);
240}
241EXPORT_SYMBOL(_raw_read_lock_irq);
242#endif
243
244#ifndef CONFIG_INLINE_READ_LOCK_BH
245void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
246{
247 __raw_read_lock_bh(lock);
248}
249EXPORT_SYMBOL(_raw_read_lock_bh);
250#endif
251
252#ifndef CONFIG_INLINE_READ_UNLOCK
253void __lockfunc _raw_read_unlock(rwlock_t *lock)
254{
255 __raw_read_unlock(lock);
256}
257EXPORT_SYMBOL(_raw_read_unlock);
258#endif
259
260#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
261void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
262{
263 __raw_read_unlock_irqrestore(lock, flags);
264}
265EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
266#endif
267
268#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
269void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
270{
271 __raw_read_unlock_irq(lock);
272}
273EXPORT_SYMBOL(_raw_read_unlock_irq);
274#endif
275
276#ifndef CONFIG_INLINE_READ_UNLOCK_BH
277void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
278{
279 __raw_read_unlock_bh(lock);
280}
281EXPORT_SYMBOL(_raw_read_unlock_bh);
282#endif
283
284#ifndef CONFIG_INLINE_WRITE_TRYLOCK
285int __lockfunc _raw_write_trylock(rwlock_t *lock)
286{
287 return __raw_write_trylock(lock);
288}
289EXPORT_SYMBOL(_raw_write_trylock);
290#endif
291
292#ifndef CONFIG_INLINE_WRITE_LOCK
293void __lockfunc _raw_write_lock(rwlock_t *lock)
294{
295 __raw_write_lock(lock);
296}
297EXPORT_SYMBOL(_raw_write_lock);
298#endif
299
300#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
301unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
302{
303 return __raw_write_lock_irqsave(lock);
304}
305EXPORT_SYMBOL(_raw_write_lock_irqsave);
306#endif
307
308#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
309void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
310{
311 __raw_write_lock_irq(lock);
312}
313EXPORT_SYMBOL(_raw_write_lock_irq);
314#endif
315
316#ifndef CONFIG_INLINE_WRITE_LOCK_BH
317void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
318{
319 __raw_write_lock_bh(lock);
320}
321EXPORT_SYMBOL(_raw_write_lock_bh);
322#endif
323
324#ifndef CONFIG_INLINE_WRITE_UNLOCK
325void __lockfunc _raw_write_unlock(rwlock_t *lock)
326{
327 __raw_write_unlock(lock);
328}
329EXPORT_SYMBOL(_raw_write_unlock);
330#endif
331
332#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
333void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
334{
335 __raw_write_unlock_irqrestore(lock, flags);
336}
337EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
338#endif
339
340#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
341void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
342{
343 __raw_write_unlock_irq(lock);
344}
345EXPORT_SYMBOL(_raw_write_unlock_irq);
346#endif
347
348#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
349void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
350{
351 __raw_write_unlock_bh(lock);
352}
353EXPORT_SYMBOL(_raw_write_unlock_bh);
354#endif
355
356#ifdef CONFIG_DEBUG_LOCK_ALLOC
357
358void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
359{
360 preempt_disable();
361 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
362 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
363}
364EXPORT_SYMBOL(_raw_spin_lock_nested);
365
366unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
367 int subclass)
368{
369 unsigned long flags;
370
371 local_irq_save(flags);
372 preempt_disable();
373 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
374 LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
375 do_raw_spin_lock_flags, &flags);
376 return flags;
377}
378EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
379
380void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
381 struct lockdep_map *nest_lock)
382{
383 preempt_disable();
384 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
385 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
386}
387EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
388
389#endif
390
391notrace int in_lock_functions(unsigned long addr)
392{
393 /* Linker adds these: start and end of __lockfunc functions */
394 extern char __lock_text_start[], __lock_text_end[];
395
396 return addr >= (unsigned long)__lock_text_start
397 && addr < (unsigned long)__lock_text_end;
398}
399EXPORT_SYMBOL(in_lock_functions);
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
new file mode 100644
index 000000000000..0374a596cffa
--- /dev/null
+++ b/kernel/locking/spinlock_debug.c
@@ -0,0 +1,302 @@
1/*
2 * Copyright 2005, Red Hat, Inc., Ingo Molnar
3 * Released under the General Public License (GPL).
4 *
5 * This file contains the spinlock/rwlock implementations for
6 * DEBUG_SPINLOCK.
7 */
8
9#include <linux/spinlock.h>
10#include <linux/nmi.h>
11#include <linux/interrupt.h>
12#include <linux/debug_locks.h>
13#include <linux/delay.h>
14#include <linux/export.h>
15
16void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
17 struct lock_class_key *key)
18{
19#ifdef CONFIG_DEBUG_LOCK_ALLOC
20 /*
21 * Make sure we are not reinitializing a held lock:
22 */
23 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
24 lockdep_init_map(&lock->dep_map, name, key, 0);
25#endif
26 lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
27 lock->magic = SPINLOCK_MAGIC;
28 lock->owner = SPINLOCK_OWNER_INIT;
29 lock->owner_cpu = -1;
30}
31
32EXPORT_SYMBOL(__raw_spin_lock_init);
33
34void __rwlock_init(rwlock_t *lock, const char *name,
35 struct lock_class_key *key)
36{
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38 /*
39 * Make sure we are not reinitializing a held lock:
40 */
41 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
42 lockdep_init_map(&lock->dep_map, name, key, 0);
43#endif
44 lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
45 lock->magic = RWLOCK_MAGIC;
46 lock->owner = SPINLOCK_OWNER_INIT;
47 lock->owner_cpu = -1;
48}
49
50EXPORT_SYMBOL(__rwlock_init);
51
52static void spin_dump(raw_spinlock_t *lock, const char *msg)
53{
54 struct task_struct *owner = NULL;
55
56 if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
57 owner = lock->owner;
58 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
59 msg, raw_smp_processor_id(),
60 current->comm, task_pid_nr(current));
61 printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
62 ".owner_cpu: %d\n",
63 lock, lock->magic,
64 owner ? owner->comm : "<none>",
65 owner ? task_pid_nr(owner) : -1,
66 lock->owner_cpu);
67 dump_stack();
68}
69
70static void spin_bug(raw_spinlock_t *lock, const char *msg)
71{
72 if (!debug_locks_off())
73 return;
74
75 spin_dump(lock, msg);
76}
77
78#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
79
80static inline void
81debug_spin_lock_before(raw_spinlock_t *lock)
82{
83 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
84 SPIN_BUG_ON(lock->owner == current, lock, "recursion");
85 SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
86 lock, "cpu recursion");
87}
88
89static inline void debug_spin_lock_after(raw_spinlock_t *lock)
90{
91 lock->owner_cpu = raw_smp_processor_id();
92 lock->owner = current;
93}
94
95static inline void debug_spin_unlock(raw_spinlock_t *lock)
96{
97 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
98 SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
99 SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
100 SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
101 lock, "wrong CPU");
102 lock->owner = SPINLOCK_OWNER_INIT;
103 lock->owner_cpu = -1;
104}
105
106static void __spin_lock_debug(raw_spinlock_t *lock)
107{
108 u64 i;
109 u64 loops = loops_per_jiffy * HZ;
110
111 for (i = 0; i < loops; i++) {
112 if (arch_spin_trylock(&lock->raw_lock))
113 return;
114 __delay(1);
115 }
116 /* lockup suspected: */
117 spin_dump(lock, "lockup suspected");
118#ifdef CONFIG_SMP
119 trigger_all_cpu_backtrace();
120#endif
121
122 /*
123 * The trylock above was causing a livelock. Give the lower level arch
124 * specific lock code a chance to acquire the lock. We have already
125 * printed a warning/backtrace at this point. The non-debug arch
126 * specific code might actually succeed in acquiring the lock. If it is
127 * not successful, the end-result is the same - there is no forward
128 * progress.
129 */
130 arch_spin_lock(&lock->raw_lock);
131}
132
133void do_raw_spin_lock(raw_spinlock_t *lock)
134{
135 debug_spin_lock_before(lock);
136 if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
137 __spin_lock_debug(lock);
138 debug_spin_lock_after(lock);
139}
140
141int do_raw_spin_trylock(raw_spinlock_t *lock)
142{
143 int ret = arch_spin_trylock(&lock->raw_lock);
144
145 if (ret)
146 debug_spin_lock_after(lock);
147#ifndef CONFIG_SMP
148 /*
149 * Must not happen on UP:
150 */
151 SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
152#endif
153 return ret;
154}
155
156void do_raw_spin_unlock(raw_spinlock_t *lock)
157{
158 debug_spin_unlock(lock);
159 arch_spin_unlock(&lock->raw_lock);
160}
161
162static void rwlock_bug(rwlock_t *lock, const char *msg)
163{
164 if (!debug_locks_off())
165 return;
166
167 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
168 msg, raw_smp_processor_id(), current->comm,
169 task_pid_nr(current), lock);
170 dump_stack();
171}
172
173#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
174
175#if 0 /* __write_lock_debug() can lock up - maybe this can too? */
176static void __read_lock_debug(rwlock_t *lock)
177{
178 u64 i;
179 u64 loops = loops_per_jiffy * HZ;
180 int print_once = 1;
181
182 for (;;) {
183 for (i = 0; i < loops; i++) {
184 if (arch_read_trylock(&lock->raw_lock))
185 return;
186 __delay(1);
187 }
188 /* lockup suspected: */
189 if (print_once) {
190 print_once = 0;
191 printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
192 "%s/%d, %p\n",
193 raw_smp_processor_id(), current->comm,
194 current->pid, lock);
195 dump_stack();
196 }
197 }
198}
199#endif
200
201void do_raw_read_lock(rwlock_t *lock)
202{
203 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
204 arch_read_lock(&lock->raw_lock);
205}
206
207int do_raw_read_trylock(rwlock_t *lock)
208{
209 int ret = arch_read_trylock(&lock->raw_lock);
210
211#ifndef CONFIG_SMP
212 /*
213 * Must not happen on UP:
214 */
215 RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
216#endif
217 return ret;
218}
219
220void do_raw_read_unlock(rwlock_t *lock)
221{
222 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
223 arch_read_unlock(&lock->raw_lock);
224}
225
226static inline void debug_write_lock_before(rwlock_t *lock)
227{
228 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
229 RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
230 RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
231 lock, "cpu recursion");
232}
233
234static inline void debug_write_lock_after(rwlock_t *lock)
235{
236 lock->owner_cpu = raw_smp_processor_id();
237 lock->owner = current;
238}
239
240static inline void debug_write_unlock(rwlock_t *lock)
241{
242 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
243 RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
244 RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
245 lock, "wrong CPU");
246 lock->owner = SPINLOCK_OWNER_INIT;
247 lock->owner_cpu = -1;
248}
249
250#if 0 /* This can cause lockups */
251static void __write_lock_debug(rwlock_t *lock)
252{
253 u64 i;
254 u64 loops = loops_per_jiffy * HZ;
255 int print_once = 1;
256
257 for (;;) {
258 for (i = 0; i < loops; i++) {
259 if (arch_write_trylock(&lock->raw_lock))
260 return;
261 __delay(1);
262 }
263 /* lockup suspected: */
264 if (print_once) {
265 print_once = 0;
266 printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
267 "%s/%d, %p\n",
268 raw_smp_processor_id(), current->comm,
269 current->pid, lock);
270 dump_stack();
271 }
272 }
273}
274#endif
275
276void do_raw_write_lock(rwlock_t *lock)
277{
278 debug_write_lock_before(lock);
279 arch_write_lock(&lock->raw_lock);
280 debug_write_lock_after(lock);
281}
282
283int do_raw_write_trylock(rwlock_t *lock)
284{
285 int ret = arch_write_trylock(&lock->raw_lock);
286
287 if (ret)
288 debug_write_lock_after(lock);
289#ifndef CONFIG_SMP
290 /*
291 * Must not happen on UP:
292 */
293 RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
294#endif
295 return ret;
296}
297
298void do_raw_write_unlock(rwlock_t *lock)
299{
300 debug_write_unlock(lock);
301 arch_write_unlock(&lock->raw_lock);
302}