diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 02:30:30 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 02:30:30 -0500 |
commit | 5e30025a319910695f5010dc0fb53a23299da14d (patch) | |
tree | 4292bcf78de221c7de1774ccf5ad0ac5a9315c26 /kernel/locking | |
parent | 7971e23a66c94f1b9bd2d64a3e86dfbfa8c60121 (diff) | |
parent | 90d3839b90fe379557dae4a44735a6af78f42885 (diff) |
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking changes from Ingo Molnar:
"The biggest changes:
- add lockdep support for seqcount/seqlocks structures, this
unearthed both bugs and required extra annotation.
- move the various kernel locking primitives to the new
kernel/locking/ directory"
* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
block: Use u64_stats_init() to initialize seqcounts
locking/lockdep: Mark __lockdep_count_forward_deps() as static
lockdep/proc: Fix lock-time avg computation
locking/doc: Update references to kernel/mutex.c
ipv6: Fix possible ipv6 seqlock deadlock
cpuset: Fix potential deadlock w/ set_mems_allowed
seqcount: Add lockdep functionality to seqcount/seqlock structures
net: Explicitly initialize u64_stats_sync structures for lockdep
locking: Move the percpu-rwsem code to kernel/locking/
locking: Move the lglocks code to kernel/locking/
locking: Move the rwsem code to kernel/locking/
locking: Move the rtmutex code to kernel/locking/
locking: Move the semaphore core to kernel/locking/
locking: Move the spinlock code to kernel/locking/
locking: Move the lockdep code to kernel/locking/
locking: Move the mutex code to kernel/locking/
hung_task debugging: Add tracepoint to report the hang
x86/locking/kconfig: Update paravirt spinlock Kconfig description
lockstat: Report avg wait and hold times
lockdep, x86/alternatives: Drop ancient lockdep fixup message
...
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/Makefile | 25 | ||||
-rw-r--r-- | kernel/locking/lglock.c | 89 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 4257 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 170 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 683 | ||||
-rw-r--r-- | kernel/locking/lockdep_states.h | 9 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 110 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.h | 55 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 960 | ||||
-rw-r--r-- | kernel/locking/mutex.h | 48 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 165 | ||||
-rw-r--r-- | kernel/locking/rtmutex-debug.c | 187 | ||||
-rw-r--r-- | kernel/locking/rtmutex-debug.h | 33 | ||||
-rw-r--r-- | kernel/locking/rtmutex-tester.c | 420 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 1060 | ||||
-rw-r--r-- | kernel/locking/rtmutex.h | 26 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 126 | ||||
-rw-r--r-- | kernel/locking/rwsem-spinlock.c | 296 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 293 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 157 | ||||
-rw-r--r-- | kernel/locking/semaphore.c | 263 | ||||
-rw-r--r-- | kernel/locking/spinlock.c | 399 | ||||
-rw-r--r-- | kernel/locking/spinlock_debug.c | 302 |
23 files changed, 10133 insertions, 0 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile new file mode 100644 index 000000000000..baab8e5e7f66 --- /dev/null +++ b/kernel/locking/Makefile | |||
@@ -0,0 +1,25 @@ | |||
1 | |||
2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o | ||
3 | |||
4 | ifdef CONFIG_FUNCTION_TRACER | ||
5 | CFLAGS_REMOVE_lockdep.o = -pg | ||
6 | CFLAGS_REMOVE_lockdep_proc.o = -pg | ||
7 | CFLAGS_REMOVE_mutex-debug.o = -pg | ||
8 | CFLAGS_REMOVE_rtmutex-debug.o = -pg | ||
9 | endif | ||
10 | |||
11 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | ||
12 | obj-$(CONFIG_LOCKDEP) += lockdep.o | ||
13 | ifeq ($(CONFIG_PROC_FS),y) | ||
14 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | ||
15 | endif | ||
16 | obj-$(CONFIG_SMP) += spinlock.o | ||
17 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | ||
18 | obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | ||
19 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | ||
20 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o | ||
21 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | ||
22 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | ||
23 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | ||
24 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | ||
25 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | ||
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c new file mode 100644 index 000000000000..86ae2aebf004 --- /dev/null +++ b/kernel/locking/lglock.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /* See include/linux/lglock.h for description */ | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/lglock.h> | ||
4 | #include <linux/cpu.h> | ||
5 | #include <linux/string.h> | ||
6 | |||
7 | /* | ||
8 | * Note there is no uninit, so lglocks cannot be defined in | ||
9 | * modules (but it's fine to use them from there) | ||
10 | * Could be added though, just undo lg_lock_init | ||
11 | */ | ||
12 | |||
13 | void lg_lock_init(struct lglock *lg, char *name) | ||
14 | { | ||
15 | LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); | ||
16 | } | ||
17 | EXPORT_SYMBOL(lg_lock_init); | ||
18 | |||
19 | void lg_local_lock(struct lglock *lg) | ||
20 | { | ||
21 | arch_spinlock_t *lock; | ||
22 | |||
23 | preempt_disable(); | ||
24 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | ||
25 | lock = this_cpu_ptr(lg->lock); | ||
26 | arch_spin_lock(lock); | ||
27 | } | ||
28 | EXPORT_SYMBOL(lg_local_lock); | ||
29 | |||
30 | void lg_local_unlock(struct lglock *lg) | ||
31 | { | ||
32 | arch_spinlock_t *lock; | ||
33 | |||
34 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
35 | lock = this_cpu_ptr(lg->lock); | ||
36 | arch_spin_unlock(lock); | ||
37 | preempt_enable(); | ||
38 | } | ||
39 | EXPORT_SYMBOL(lg_local_unlock); | ||
40 | |||
41 | void lg_local_lock_cpu(struct lglock *lg, int cpu) | ||
42 | { | ||
43 | arch_spinlock_t *lock; | ||
44 | |||
45 | preempt_disable(); | ||
46 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | ||
47 | lock = per_cpu_ptr(lg->lock, cpu); | ||
48 | arch_spin_lock(lock); | ||
49 | } | ||
50 | EXPORT_SYMBOL(lg_local_lock_cpu); | ||
51 | |||
52 | void lg_local_unlock_cpu(struct lglock *lg, int cpu) | ||
53 | { | ||
54 | arch_spinlock_t *lock; | ||
55 | |||
56 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
57 | lock = per_cpu_ptr(lg->lock, cpu); | ||
58 | arch_spin_unlock(lock); | ||
59 | preempt_enable(); | ||
60 | } | ||
61 | EXPORT_SYMBOL(lg_local_unlock_cpu); | ||
62 | |||
63 | void lg_global_lock(struct lglock *lg) | ||
64 | { | ||
65 | int i; | ||
66 | |||
67 | preempt_disable(); | ||
68 | lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | ||
69 | for_each_possible_cpu(i) { | ||
70 | arch_spinlock_t *lock; | ||
71 | lock = per_cpu_ptr(lg->lock, i); | ||
72 | arch_spin_lock(lock); | ||
73 | } | ||
74 | } | ||
75 | EXPORT_SYMBOL(lg_global_lock); | ||
76 | |||
77 | void lg_global_unlock(struct lglock *lg) | ||
78 | { | ||
79 | int i; | ||
80 | |||
81 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
82 | for_each_possible_cpu(i) { | ||
83 | arch_spinlock_t *lock; | ||
84 | lock = per_cpu_ptr(lg->lock, i); | ||
85 | arch_spin_unlock(lock); | ||
86 | } | ||
87 | preempt_enable(); | ||
88 | } | ||
89 | EXPORT_SYMBOL(lg_global_unlock); | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c new file mode 100644 index 000000000000..576ba756a32d --- /dev/null +++ b/kernel/locking/lockdep.c | |||
@@ -0,0 +1,4257 @@ | |||
1 | /* | ||
2 | * kernel/lockdep.c | ||
3 | * | ||
4 | * Runtime locking correctness validator | ||
5 | * | ||
6 | * Started by Ingo Molnar: | ||
7 | * | ||
8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
10 | * | ||
11 | * this code maps all the lock dependencies as they occur in a live kernel | ||
12 | * and will warn about the following classes of locking bugs: | ||
13 | * | ||
14 | * - lock inversion scenarios | ||
15 | * - circular lock dependencies | ||
16 | * - hardirq/softirq safe/unsafe locking bugs | ||
17 | * | ||
18 | * Bugs are reported even if the current locking scenario does not cause | ||
19 | * any deadlock at this point. | ||
20 | * | ||
21 | * I.e. if anytime in the past two locks were taken in a different order, | ||
22 | * even if it happened for another task, even if those were different | ||
23 | * locks (but of the same class as this lock), this code will detect it. | ||
24 | * | ||
25 | * Thanks to Arjan van de Ven for coming up with the initial idea of | ||
26 | * mapping lock dependencies runtime. | ||
27 | */ | ||
28 | #define DISABLE_BRANCH_PROFILING | ||
29 | #include <linux/mutex.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/delay.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/proc_fs.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/kallsyms.h> | ||
37 | #include <linux/interrupt.h> | ||
38 | #include <linux/stacktrace.h> | ||
39 | #include <linux/debug_locks.h> | ||
40 | #include <linux/irqflags.h> | ||
41 | #include <linux/utsname.h> | ||
42 | #include <linux/hash.h> | ||
43 | #include <linux/ftrace.h> | ||
44 | #include <linux/stringify.h> | ||
45 | #include <linux/bitops.h> | ||
46 | #include <linux/gfp.h> | ||
47 | #include <linux/kmemcheck.h> | ||
48 | |||
49 | #include <asm/sections.h> | ||
50 | |||
51 | #include "lockdep_internals.h" | ||
52 | |||
53 | #define CREATE_TRACE_POINTS | ||
54 | #include <trace/events/lock.h> | ||
55 | |||
56 | #ifdef CONFIG_PROVE_LOCKING | ||
57 | int prove_locking = 1; | ||
58 | module_param(prove_locking, int, 0644); | ||
59 | #else | ||
60 | #define prove_locking 0 | ||
61 | #endif | ||
62 | |||
63 | #ifdef CONFIG_LOCK_STAT | ||
64 | int lock_stat = 1; | ||
65 | module_param(lock_stat, int, 0644); | ||
66 | #else | ||
67 | #define lock_stat 0 | ||
68 | #endif | ||
69 | |||
70 | /* | ||
71 | * lockdep_lock: protects the lockdep graph, the hashes and the | ||
72 | * class/list/hash allocators. | ||
73 | * | ||
74 | * This is one of the rare exceptions where it's justified | ||
75 | * to use a raw spinlock - we really dont want the spinlock | ||
76 | * code to recurse back into the lockdep code... | ||
77 | */ | ||
78 | static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | ||
79 | |||
80 | static int graph_lock(void) | ||
81 | { | ||
82 | arch_spin_lock(&lockdep_lock); | ||
83 | /* | ||
84 | * Make sure that if another CPU detected a bug while | ||
85 | * walking the graph we dont change it (while the other | ||
86 | * CPU is busy printing out stuff with the graph lock | ||
87 | * dropped already) | ||
88 | */ | ||
89 | if (!debug_locks) { | ||
90 | arch_spin_unlock(&lockdep_lock); | ||
91 | return 0; | ||
92 | } | ||
93 | /* prevent any recursions within lockdep from causing deadlocks */ | ||
94 | current->lockdep_recursion++; | ||
95 | return 1; | ||
96 | } | ||
97 | |||
98 | static inline int graph_unlock(void) | ||
99 | { | ||
100 | if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) { | ||
101 | /* | ||
102 | * The lockdep graph lock isn't locked while we expect it to | ||
103 | * be, we're confused now, bye! | ||
104 | */ | ||
105 | return DEBUG_LOCKS_WARN_ON(1); | ||
106 | } | ||
107 | |||
108 | current->lockdep_recursion--; | ||
109 | arch_spin_unlock(&lockdep_lock); | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Turn lock debugging off and return with 0 if it was off already, | ||
115 | * and also release the graph lock: | ||
116 | */ | ||
117 | static inline int debug_locks_off_graph_unlock(void) | ||
118 | { | ||
119 | int ret = debug_locks_off(); | ||
120 | |||
121 | arch_spin_unlock(&lockdep_lock); | ||
122 | |||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | static int lockdep_initialized; | ||
127 | |||
128 | unsigned long nr_list_entries; | ||
129 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; | ||
130 | |||
131 | /* | ||
132 | * All data structures here are protected by the global debug_lock. | ||
133 | * | ||
134 | * Mutex key structs only get allocated, once during bootup, and never | ||
135 | * get freed - this significantly simplifies the debugging code. | ||
136 | */ | ||
137 | unsigned long nr_lock_classes; | ||
138 | static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; | ||
139 | |||
140 | static inline struct lock_class *hlock_class(struct held_lock *hlock) | ||
141 | { | ||
142 | if (!hlock->class_idx) { | ||
143 | /* | ||
144 | * Someone passed in garbage, we give up. | ||
145 | */ | ||
146 | DEBUG_LOCKS_WARN_ON(1); | ||
147 | return NULL; | ||
148 | } | ||
149 | return lock_classes + hlock->class_idx - 1; | ||
150 | } | ||
151 | |||
152 | #ifdef CONFIG_LOCK_STAT | ||
153 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], | ||
154 | cpu_lock_stats); | ||
155 | |||
156 | static inline u64 lockstat_clock(void) | ||
157 | { | ||
158 | return local_clock(); | ||
159 | } | ||
160 | |||
161 | static int lock_point(unsigned long points[], unsigned long ip) | ||
162 | { | ||
163 | int i; | ||
164 | |||
165 | for (i = 0; i < LOCKSTAT_POINTS; i++) { | ||
166 | if (points[i] == 0) { | ||
167 | points[i] = ip; | ||
168 | break; | ||
169 | } | ||
170 | if (points[i] == ip) | ||
171 | break; | ||
172 | } | ||
173 | |||
174 | return i; | ||
175 | } | ||
176 | |||
177 | static void lock_time_inc(struct lock_time *lt, u64 time) | ||
178 | { | ||
179 | if (time > lt->max) | ||
180 | lt->max = time; | ||
181 | |||
182 | if (time < lt->min || !lt->nr) | ||
183 | lt->min = time; | ||
184 | |||
185 | lt->total += time; | ||
186 | lt->nr++; | ||
187 | } | ||
188 | |||
189 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) | ||
190 | { | ||
191 | if (!src->nr) | ||
192 | return; | ||
193 | |||
194 | if (src->max > dst->max) | ||
195 | dst->max = src->max; | ||
196 | |||
197 | if (src->min < dst->min || !dst->nr) | ||
198 | dst->min = src->min; | ||
199 | |||
200 | dst->total += src->total; | ||
201 | dst->nr += src->nr; | ||
202 | } | ||
203 | |||
204 | struct lock_class_stats lock_stats(struct lock_class *class) | ||
205 | { | ||
206 | struct lock_class_stats stats; | ||
207 | int cpu, i; | ||
208 | |||
209 | memset(&stats, 0, sizeof(struct lock_class_stats)); | ||
210 | for_each_possible_cpu(cpu) { | ||
211 | struct lock_class_stats *pcs = | ||
212 | &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; | ||
213 | |||
214 | for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) | ||
215 | stats.contention_point[i] += pcs->contention_point[i]; | ||
216 | |||
217 | for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) | ||
218 | stats.contending_point[i] += pcs->contending_point[i]; | ||
219 | |||
220 | lock_time_add(&pcs->read_waittime, &stats.read_waittime); | ||
221 | lock_time_add(&pcs->write_waittime, &stats.write_waittime); | ||
222 | |||
223 | lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); | ||
224 | lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); | ||
225 | |||
226 | for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) | ||
227 | stats.bounces[i] += pcs->bounces[i]; | ||
228 | } | ||
229 | |||
230 | return stats; | ||
231 | } | ||
232 | |||
233 | void clear_lock_stats(struct lock_class *class) | ||
234 | { | ||
235 | int cpu; | ||
236 | |||
237 | for_each_possible_cpu(cpu) { | ||
238 | struct lock_class_stats *cpu_stats = | ||
239 | &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; | ||
240 | |||
241 | memset(cpu_stats, 0, sizeof(struct lock_class_stats)); | ||
242 | } | ||
243 | memset(class->contention_point, 0, sizeof(class->contention_point)); | ||
244 | memset(class->contending_point, 0, sizeof(class->contending_point)); | ||
245 | } | ||
246 | |||
247 | static struct lock_class_stats *get_lock_stats(struct lock_class *class) | ||
248 | { | ||
249 | return &get_cpu_var(cpu_lock_stats)[class - lock_classes]; | ||
250 | } | ||
251 | |||
252 | static void put_lock_stats(struct lock_class_stats *stats) | ||
253 | { | ||
254 | put_cpu_var(cpu_lock_stats); | ||
255 | } | ||
256 | |||
257 | static void lock_release_holdtime(struct held_lock *hlock) | ||
258 | { | ||
259 | struct lock_class_stats *stats; | ||
260 | u64 holdtime; | ||
261 | |||
262 | if (!lock_stat) | ||
263 | return; | ||
264 | |||
265 | holdtime = lockstat_clock() - hlock->holdtime_stamp; | ||
266 | |||
267 | stats = get_lock_stats(hlock_class(hlock)); | ||
268 | if (hlock->read) | ||
269 | lock_time_inc(&stats->read_holdtime, holdtime); | ||
270 | else | ||
271 | lock_time_inc(&stats->write_holdtime, holdtime); | ||
272 | put_lock_stats(stats); | ||
273 | } | ||
274 | #else | ||
275 | static inline void lock_release_holdtime(struct held_lock *hlock) | ||
276 | { | ||
277 | } | ||
278 | #endif | ||
279 | |||
280 | /* | ||
281 | * We keep a global list of all lock classes. The list only grows, | ||
282 | * never shrinks. The list is only accessed with the lockdep | ||
283 | * spinlock lock held. | ||
284 | */ | ||
285 | LIST_HEAD(all_lock_classes); | ||
286 | |||
287 | /* | ||
288 | * The lockdep classes are in a hash-table as well, for fast lookup: | ||
289 | */ | ||
290 | #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) | ||
291 | #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) | ||
292 | #define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) | ||
293 | #define classhashentry(key) (classhash_table + __classhashfn((key))) | ||
294 | |||
295 | static struct list_head classhash_table[CLASSHASH_SIZE]; | ||
296 | |||
297 | /* | ||
298 | * We put the lock dependency chains into a hash-table as well, to cache | ||
299 | * their existence: | ||
300 | */ | ||
301 | #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) | ||
302 | #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) | ||
303 | #define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) | ||
304 | #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) | ||
305 | |||
306 | static struct list_head chainhash_table[CHAINHASH_SIZE]; | ||
307 | |||
308 | /* | ||
309 | * The hash key of the lock dependency chains is a hash itself too: | ||
310 | * it's a hash of all locks taken up to that lock, including that lock. | ||
311 | * It's a 64-bit hash, because it's important for the keys to be | ||
312 | * unique. | ||
313 | */ | ||
314 | #define iterate_chain_key(key1, key2) \ | ||
315 | (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ | ||
316 | ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ | ||
317 | (key2)) | ||
318 | |||
319 | void lockdep_off(void) | ||
320 | { | ||
321 | current->lockdep_recursion++; | ||
322 | } | ||
323 | EXPORT_SYMBOL(lockdep_off); | ||
324 | |||
325 | void lockdep_on(void) | ||
326 | { | ||
327 | current->lockdep_recursion--; | ||
328 | } | ||
329 | EXPORT_SYMBOL(lockdep_on); | ||
330 | |||
331 | /* | ||
332 | * Debugging switches: | ||
333 | */ | ||
334 | |||
335 | #define VERBOSE 0 | ||
336 | #define VERY_VERBOSE 0 | ||
337 | |||
338 | #if VERBOSE | ||
339 | # define HARDIRQ_VERBOSE 1 | ||
340 | # define SOFTIRQ_VERBOSE 1 | ||
341 | # define RECLAIM_VERBOSE 1 | ||
342 | #else | ||
343 | # define HARDIRQ_VERBOSE 0 | ||
344 | # define SOFTIRQ_VERBOSE 0 | ||
345 | # define RECLAIM_VERBOSE 0 | ||
346 | #endif | ||
347 | |||
348 | #if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE | ||
349 | /* | ||
350 | * Quick filtering for interesting events: | ||
351 | */ | ||
352 | static int class_filter(struct lock_class *class) | ||
353 | { | ||
354 | #if 0 | ||
355 | /* Example */ | ||
356 | if (class->name_version == 1 && | ||
357 | !strcmp(class->name, "lockname")) | ||
358 | return 1; | ||
359 | if (class->name_version == 1 && | ||
360 | !strcmp(class->name, "&struct->lockfield")) | ||
361 | return 1; | ||
362 | #endif | ||
363 | /* Filter everything else. 1 would be to allow everything else */ | ||
364 | return 0; | ||
365 | } | ||
366 | #endif | ||
367 | |||
368 | static int verbose(struct lock_class *class) | ||
369 | { | ||
370 | #if VERBOSE | ||
371 | return class_filter(class); | ||
372 | #endif | ||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * Stack-trace: tightly packed array of stack backtrace | ||
378 | * addresses. Protected by the graph_lock. | ||
379 | */ | ||
380 | unsigned long nr_stack_trace_entries; | ||
381 | static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; | ||
382 | |||
383 | static void print_lockdep_off(const char *bug_msg) | ||
384 | { | ||
385 | printk(KERN_DEBUG "%s\n", bug_msg); | ||
386 | printk(KERN_DEBUG "turning off the locking correctness validator.\n"); | ||
387 | printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); | ||
388 | } | ||
389 | |||
390 | static int save_trace(struct stack_trace *trace) | ||
391 | { | ||
392 | trace->nr_entries = 0; | ||
393 | trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; | ||
394 | trace->entries = stack_trace + nr_stack_trace_entries; | ||
395 | |||
396 | trace->skip = 3; | ||
397 | |||
398 | save_stack_trace(trace); | ||
399 | |||
400 | /* | ||
401 | * Some daft arches put -1 at the end to indicate its a full trace. | ||
402 | * | ||
403 | * <rant> this is buggy anyway, since it takes a whole extra entry so a | ||
404 | * complete trace that maxes out the entries provided will be reported | ||
405 | * as incomplete, friggin useless </rant> | ||
406 | */ | ||
407 | if (trace->nr_entries != 0 && | ||
408 | trace->entries[trace->nr_entries-1] == ULONG_MAX) | ||
409 | trace->nr_entries--; | ||
410 | |||
411 | trace->max_entries = trace->nr_entries; | ||
412 | |||
413 | nr_stack_trace_entries += trace->nr_entries; | ||
414 | |||
415 | if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { | ||
416 | if (!debug_locks_off_graph_unlock()) | ||
417 | return 0; | ||
418 | |||
419 | print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); | ||
420 | dump_stack(); | ||
421 | |||
422 | return 0; | ||
423 | } | ||
424 | |||
425 | return 1; | ||
426 | } | ||
427 | |||
428 | unsigned int nr_hardirq_chains; | ||
429 | unsigned int nr_softirq_chains; | ||
430 | unsigned int nr_process_chains; | ||
431 | unsigned int max_lockdep_depth; | ||
432 | |||
433 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
434 | /* | ||
435 | * We cannot printk in early bootup code. Not even early_printk() | ||
436 | * might work. So we mark any initialization errors and printk | ||
437 | * about it later on, in lockdep_info(). | ||
438 | */ | ||
439 | static int lockdep_init_error; | ||
440 | static const char *lock_init_error; | ||
441 | static unsigned long lockdep_init_trace_data[20]; | ||
442 | static struct stack_trace lockdep_init_trace = { | ||
443 | .max_entries = ARRAY_SIZE(lockdep_init_trace_data), | ||
444 | .entries = lockdep_init_trace_data, | ||
445 | }; | ||
446 | |||
447 | /* | ||
448 | * Various lockdep statistics: | ||
449 | */ | ||
450 | DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); | ||
451 | #endif | ||
452 | |||
453 | /* | ||
454 | * Locking printouts: | ||
455 | */ | ||
456 | |||
457 | #define __USAGE(__STATE) \ | ||
458 | [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \ | ||
459 | [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \ | ||
460 | [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\ | ||
461 | [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R", | ||
462 | |||
463 | static const char *usage_str[] = | ||
464 | { | ||
465 | #define LOCKDEP_STATE(__STATE) __USAGE(__STATE) | ||
466 | #include "lockdep_states.h" | ||
467 | #undef LOCKDEP_STATE | ||
468 | [LOCK_USED] = "INITIAL USE", | ||
469 | }; | ||
470 | |||
471 | const char * __get_key_name(struct lockdep_subclass_key *key, char *str) | ||
472 | { | ||
473 | return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); | ||
474 | } | ||
475 | |||
476 | static inline unsigned long lock_flag(enum lock_usage_bit bit) | ||
477 | { | ||
478 | return 1UL << bit; | ||
479 | } | ||
480 | |||
481 | static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) | ||
482 | { | ||
483 | char c = '.'; | ||
484 | |||
485 | if (class->usage_mask & lock_flag(bit + 2)) | ||
486 | c = '+'; | ||
487 | if (class->usage_mask & lock_flag(bit)) { | ||
488 | c = '-'; | ||
489 | if (class->usage_mask & lock_flag(bit + 2)) | ||
490 | c = '?'; | ||
491 | } | ||
492 | |||
493 | return c; | ||
494 | } | ||
495 | |||
496 | void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) | ||
497 | { | ||
498 | int i = 0; | ||
499 | |||
500 | #define LOCKDEP_STATE(__STATE) \ | ||
501 | usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \ | ||
502 | usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ); | ||
503 | #include "lockdep_states.h" | ||
504 | #undef LOCKDEP_STATE | ||
505 | |||
506 | usage[i] = '\0'; | ||
507 | } | ||
508 | |||
509 | static void __print_lock_name(struct lock_class *class) | ||
510 | { | ||
511 | char str[KSYM_NAME_LEN]; | ||
512 | const char *name; | ||
513 | |||
514 | name = class->name; | ||
515 | if (!name) { | ||
516 | name = __get_key_name(class->key, str); | ||
517 | printk("%s", name); | ||
518 | } else { | ||
519 | printk("%s", name); | ||
520 | if (class->name_version > 1) | ||
521 | printk("#%d", class->name_version); | ||
522 | if (class->subclass) | ||
523 | printk("/%d", class->subclass); | ||
524 | } | ||
525 | } | ||
526 | |||
527 | static void print_lock_name(struct lock_class *class) | ||
528 | { | ||
529 | char usage[LOCK_USAGE_CHARS]; | ||
530 | |||
531 | get_usage_chars(class, usage); | ||
532 | |||
533 | printk(" ("); | ||
534 | __print_lock_name(class); | ||
535 | printk("){%s}", usage); | ||
536 | } | ||
537 | |||
538 | static void print_lockdep_cache(struct lockdep_map *lock) | ||
539 | { | ||
540 | const char *name; | ||
541 | char str[KSYM_NAME_LEN]; | ||
542 | |||
543 | name = lock->name; | ||
544 | if (!name) | ||
545 | name = __get_key_name(lock->key->subkeys, str); | ||
546 | |||
547 | printk("%s", name); | ||
548 | } | ||
549 | |||
550 | static void print_lock(struct held_lock *hlock) | ||
551 | { | ||
552 | print_lock_name(hlock_class(hlock)); | ||
553 | printk(", at: "); | ||
554 | print_ip_sym(hlock->acquire_ip); | ||
555 | } | ||
556 | |||
557 | static void lockdep_print_held_locks(struct task_struct *curr) | ||
558 | { | ||
559 | int i, depth = curr->lockdep_depth; | ||
560 | |||
561 | if (!depth) { | ||
562 | printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr)); | ||
563 | return; | ||
564 | } | ||
565 | printk("%d lock%s held by %s/%d:\n", | ||
566 | depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr)); | ||
567 | |||
568 | for (i = 0; i < depth; i++) { | ||
569 | printk(" #%d: ", i); | ||
570 | print_lock(curr->held_locks + i); | ||
571 | } | ||
572 | } | ||
573 | |||
574 | static void print_kernel_ident(void) | ||
575 | { | ||
576 | printk("%s %.*s %s\n", init_utsname()->release, | ||
577 | (int)strcspn(init_utsname()->version, " "), | ||
578 | init_utsname()->version, | ||
579 | print_tainted()); | ||
580 | } | ||
581 | |||
582 | static int very_verbose(struct lock_class *class) | ||
583 | { | ||
584 | #if VERY_VERBOSE | ||
585 | return class_filter(class); | ||
586 | #endif | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | /* | ||
591 | * Is this the address of a static object: | ||
592 | */ | ||
593 | static int static_obj(void *obj) | ||
594 | { | ||
595 | unsigned long start = (unsigned long) &_stext, | ||
596 | end = (unsigned long) &_end, | ||
597 | addr = (unsigned long) obj; | ||
598 | |||
599 | /* | ||
600 | * static variable? | ||
601 | */ | ||
602 | if ((addr >= start) && (addr < end)) | ||
603 | return 1; | ||
604 | |||
605 | if (arch_is_kernel_data(addr)) | ||
606 | return 1; | ||
607 | |||
608 | /* | ||
609 | * in-kernel percpu var? | ||
610 | */ | ||
611 | if (is_kernel_percpu_address(addr)) | ||
612 | return 1; | ||
613 | |||
614 | /* | ||
615 | * module static or percpu var? | ||
616 | */ | ||
617 | return is_module_address(addr) || is_module_percpu_address(addr); | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * To make lock name printouts unique, we calculate a unique | ||
622 | * class->name_version generation counter: | ||
623 | */ | ||
624 | static int count_matching_names(struct lock_class *new_class) | ||
625 | { | ||
626 | struct lock_class *class; | ||
627 | int count = 0; | ||
628 | |||
629 | if (!new_class->name) | ||
630 | return 0; | ||
631 | |||
632 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
633 | if (new_class->key - new_class->subclass == class->key) | ||
634 | return class->name_version; | ||
635 | if (class->name && !strcmp(class->name, new_class->name)) | ||
636 | count = max(count, class->name_version); | ||
637 | } | ||
638 | |||
639 | return count + 1; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Register a lock's class in the hash-table, if the class is not present | ||
644 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
645 | * itself, so actual lookup of the hash should be once per lock object. | ||
646 | */ | ||
647 | static inline struct lock_class * | ||
648 | look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | ||
649 | { | ||
650 | struct lockdep_subclass_key *key; | ||
651 | struct list_head *hash_head; | ||
652 | struct lock_class *class; | ||
653 | |||
654 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
655 | /* | ||
656 | * If the architecture calls into lockdep before initializing | ||
657 | * the hashes then we'll warn about it later. (we cannot printk | ||
658 | * right now) | ||
659 | */ | ||
660 | if (unlikely(!lockdep_initialized)) { | ||
661 | lockdep_init(); | ||
662 | lockdep_init_error = 1; | ||
663 | lock_init_error = lock->name; | ||
664 | save_stack_trace(&lockdep_init_trace); | ||
665 | } | ||
666 | #endif | ||
667 | |||
668 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
669 | debug_locks_off(); | ||
670 | printk(KERN_ERR | ||
671 | "BUG: looking up invalid subclass: %u\n", subclass); | ||
672 | printk(KERN_ERR | ||
673 | "turning off the locking correctness validator.\n"); | ||
674 | dump_stack(); | ||
675 | return NULL; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Static locks do not have their class-keys yet - for them the key | ||
680 | * is the lock object itself: | ||
681 | */ | ||
682 | if (unlikely(!lock->key)) | ||
683 | lock->key = (void *)lock; | ||
684 | |||
685 | /* | ||
686 | * NOTE: the class-key must be unique. For dynamic locks, a static | ||
687 | * lock_class_key variable is passed in through the mutex_init() | ||
688 | * (or spin_lock_init()) call - which acts as the key. For static | ||
689 | * locks we use the lock object itself as the key. | ||
690 | */ | ||
691 | BUILD_BUG_ON(sizeof(struct lock_class_key) > | ||
692 | sizeof(struct lockdep_map)); | ||
693 | |||
694 | key = lock->key->subkeys + subclass; | ||
695 | |||
696 | hash_head = classhashentry(key); | ||
697 | |||
698 | /* | ||
699 | * We can walk the hash lockfree, because the hash only | ||
700 | * grows, and we are careful when adding entries to the end: | ||
701 | */ | ||
702 | list_for_each_entry(class, hash_head, hash_entry) { | ||
703 | if (class->key == key) { | ||
704 | /* | ||
705 | * Huh! same key, different name? Did someone trample | ||
706 | * on some memory? We're most confused. | ||
707 | */ | ||
708 | WARN_ON_ONCE(class->name != lock->name); | ||
709 | return class; | ||
710 | } | ||
711 | } | ||
712 | |||
713 | return NULL; | ||
714 | } | ||
715 | |||
716 | /* | ||
717 | * Register a lock's class in the hash-table, if the class is not present | ||
718 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
719 | * itself, so actual lookup of the hash should be once per lock object. | ||
720 | */ | ||
721 | static inline struct lock_class * | ||
722 | register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | ||
723 | { | ||
724 | struct lockdep_subclass_key *key; | ||
725 | struct list_head *hash_head; | ||
726 | struct lock_class *class; | ||
727 | unsigned long flags; | ||
728 | |||
729 | class = look_up_lock_class(lock, subclass); | ||
730 | if (likely(class)) | ||
731 | goto out_set_class_cache; | ||
732 | |||
733 | /* | ||
734 | * Debug-check: all keys must be persistent! | ||
735 | */ | ||
736 | if (!static_obj(lock->key)) { | ||
737 | debug_locks_off(); | ||
738 | printk("INFO: trying to register non-static key.\n"); | ||
739 | printk("the code is fine but needs lockdep annotation.\n"); | ||
740 | printk("turning off the locking correctness validator.\n"); | ||
741 | dump_stack(); | ||
742 | |||
743 | return NULL; | ||
744 | } | ||
745 | |||
746 | key = lock->key->subkeys + subclass; | ||
747 | hash_head = classhashentry(key); | ||
748 | |||
749 | raw_local_irq_save(flags); | ||
750 | if (!graph_lock()) { | ||
751 | raw_local_irq_restore(flags); | ||
752 | return NULL; | ||
753 | } | ||
754 | /* | ||
755 | * We have to do the hash-walk again, to avoid races | ||
756 | * with another CPU: | ||
757 | */ | ||
758 | list_for_each_entry(class, hash_head, hash_entry) | ||
759 | if (class->key == key) | ||
760 | goto out_unlock_set; | ||
761 | /* | ||
762 | * Allocate a new key from the static array, and add it to | ||
763 | * the hash: | ||
764 | */ | ||
765 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | ||
766 | if (!debug_locks_off_graph_unlock()) { | ||
767 | raw_local_irq_restore(flags); | ||
768 | return NULL; | ||
769 | } | ||
770 | raw_local_irq_restore(flags); | ||
771 | |||
772 | print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); | ||
773 | dump_stack(); | ||
774 | return NULL; | ||
775 | } | ||
776 | class = lock_classes + nr_lock_classes++; | ||
777 | debug_atomic_inc(nr_unused_locks); | ||
778 | class->key = key; | ||
779 | class->name = lock->name; | ||
780 | class->subclass = subclass; | ||
781 | INIT_LIST_HEAD(&class->lock_entry); | ||
782 | INIT_LIST_HEAD(&class->locks_before); | ||
783 | INIT_LIST_HEAD(&class->locks_after); | ||
784 | class->name_version = count_matching_names(class); | ||
785 | /* | ||
786 | * We use RCU's safe list-add method to make | ||
787 | * parallel walking of the hash-list safe: | ||
788 | */ | ||
789 | list_add_tail_rcu(&class->hash_entry, hash_head); | ||
790 | /* | ||
791 | * Add it to the global list of classes: | ||
792 | */ | ||
793 | list_add_tail_rcu(&class->lock_entry, &all_lock_classes); | ||
794 | |||
795 | if (verbose(class)) { | ||
796 | graph_unlock(); | ||
797 | raw_local_irq_restore(flags); | ||
798 | |||
799 | printk("\nnew class %p: %s", class->key, class->name); | ||
800 | if (class->name_version > 1) | ||
801 | printk("#%d", class->name_version); | ||
802 | printk("\n"); | ||
803 | dump_stack(); | ||
804 | |||
805 | raw_local_irq_save(flags); | ||
806 | if (!graph_lock()) { | ||
807 | raw_local_irq_restore(flags); | ||
808 | return NULL; | ||
809 | } | ||
810 | } | ||
811 | out_unlock_set: | ||
812 | graph_unlock(); | ||
813 | raw_local_irq_restore(flags); | ||
814 | |||
815 | out_set_class_cache: | ||
816 | if (!subclass || force) | ||
817 | lock->class_cache[0] = class; | ||
818 | else if (subclass < NR_LOCKDEP_CACHING_CLASSES) | ||
819 | lock->class_cache[subclass] = class; | ||
820 | |||
821 | /* | ||
822 | * Hash collision, did we smoke some? We found a class with a matching | ||
823 | * hash but the subclass -- which is hashed in -- didn't match. | ||
824 | */ | ||
825 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | ||
826 | return NULL; | ||
827 | |||
828 | return class; | ||
829 | } | ||
830 | |||
831 | #ifdef CONFIG_PROVE_LOCKING | ||
832 | /* | ||
833 | * Allocate a lockdep entry. (assumes the graph_lock held, returns | ||
834 | * with NULL on failure) | ||
835 | */ | ||
836 | static struct lock_list *alloc_list_entry(void) | ||
837 | { | ||
838 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { | ||
839 | if (!debug_locks_off_graph_unlock()) | ||
840 | return NULL; | ||
841 | |||
842 | print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); | ||
843 | dump_stack(); | ||
844 | return NULL; | ||
845 | } | ||
846 | return list_entries + nr_list_entries++; | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * Add a new dependency to the head of the list: | ||
851 | */ | ||
852 | static int add_lock_to_list(struct lock_class *class, struct lock_class *this, | ||
853 | struct list_head *head, unsigned long ip, | ||
854 | int distance, struct stack_trace *trace) | ||
855 | { | ||
856 | struct lock_list *entry; | ||
857 | /* | ||
858 | * Lock not present yet - get a new dependency struct and | ||
859 | * add it to the list: | ||
860 | */ | ||
861 | entry = alloc_list_entry(); | ||
862 | if (!entry) | ||
863 | return 0; | ||
864 | |||
865 | entry->class = this; | ||
866 | entry->distance = distance; | ||
867 | entry->trace = *trace; | ||
868 | /* | ||
869 | * Since we never remove from the dependency list, the list can | ||
870 | * be walked lockless by other CPUs, it's only allocation | ||
871 | * that must be protected by the spinlock. But this also means | ||
872 | * we must make new entries visible only once writes to the | ||
873 | * entry become visible - hence the RCU op: | ||
874 | */ | ||
875 | list_add_tail_rcu(&entry->entry, head); | ||
876 | |||
877 | return 1; | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * For good efficiency of modular, we use power of 2 | ||
882 | */ | ||
883 | #define MAX_CIRCULAR_QUEUE_SIZE 4096UL | ||
884 | #define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) | ||
885 | |||
886 | /* | ||
887 | * The circular_queue and helpers is used to implement the | ||
888 | * breadth-first search(BFS)algorithem, by which we can build | ||
889 | * the shortest path from the next lock to be acquired to the | ||
890 | * previous held lock if there is a circular between them. | ||
891 | */ | ||
892 | struct circular_queue { | ||
893 | unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; | ||
894 | unsigned int front, rear; | ||
895 | }; | ||
896 | |||
897 | static struct circular_queue lock_cq; | ||
898 | |||
899 | unsigned int max_bfs_queue_depth; | ||
900 | |||
901 | static unsigned int lockdep_dependency_gen_id; | ||
902 | |||
903 | static inline void __cq_init(struct circular_queue *cq) | ||
904 | { | ||
905 | cq->front = cq->rear = 0; | ||
906 | lockdep_dependency_gen_id++; | ||
907 | } | ||
908 | |||
909 | static inline int __cq_empty(struct circular_queue *cq) | ||
910 | { | ||
911 | return (cq->front == cq->rear); | ||
912 | } | ||
913 | |||
914 | static inline int __cq_full(struct circular_queue *cq) | ||
915 | { | ||
916 | return ((cq->rear + 1) & CQ_MASK) == cq->front; | ||
917 | } | ||
918 | |||
919 | static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) | ||
920 | { | ||
921 | if (__cq_full(cq)) | ||
922 | return -1; | ||
923 | |||
924 | cq->element[cq->rear] = elem; | ||
925 | cq->rear = (cq->rear + 1) & CQ_MASK; | ||
926 | return 0; | ||
927 | } | ||
928 | |||
929 | static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) | ||
930 | { | ||
931 | if (__cq_empty(cq)) | ||
932 | return -1; | ||
933 | |||
934 | *elem = cq->element[cq->front]; | ||
935 | cq->front = (cq->front + 1) & CQ_MASK; | ||
936 | return 0; | ||
937 | } | ||
938 | |||
939 | static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) | ||
940 | { | ||
941 | return (cq->rear - cq->front) & CQ_MASK; | ||
942 | } | ||
943 | |||
944 | static inline void mark_lock_accessed(struct lock_list *lock, | ||
945 | struct lock_list *parent) | ||
946 | { | ||
947 | unsigned long nr; | ||
948 | |||
949 | nr = lock - list_entries; | ||
950 | WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ | ||
951 | lock->parent = parent; | ||
952 | lock->class->dep_gen_id = lockdep_dependency_gen_id; | ||
953 | } | ||
954 | |||
955 | static inline unsigned long lock_accessed(struct lock_list *lock) | ||
956 | { | ||
957 | unsigned long nr; | ||
958 | |||
959 | nr = lock - list_entries; | ||
960 | WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ | ||
961 | return lock->class->dep_gen_id == lockdep_dependency_gen_id; | ||
962 | } | ||
963 | |||
964 | static inline struct lock_list *get_lock_parent(struct lock_list *child) | ||
965 | { | ||
966 | return child->parent; | ||
967 | } | ||
968 | |||
969 | static inline int get_lock_depth(struct lock_list *child) | ||
970 | { | ||
971 | int depth = 0; | ||
972 | struct lock_list *parent; | ||
973 | |||
974 | while ((parent = get_lock_parent(child))) { | ||
975 | child = parent; | ||
976 | depth++; | ||
977 | } | ||
978 | return depth; | ||
979 | } | ||
980 | |||
981 | static int __bfs(struct lock_list *source_entry, | ||
982 | void *data, | ||
983 | int (*match)(struct lock_list *entry, void *data), | ||
984 | struct lock_list **target_entry, | ||
985 | int forward) | ||
986 | { | ||
987 | struct lock_list *entry; | ||
988 | struct list_head *head; | ||
989 | struct circular_queue *cq = &lock_cq; | ||
990 | int ret = 1; | ||
991 | |||
992 | if (match(source_entry, data)) { | ||
993 | *target_entry = source_entry; | ||
994 | ret = 0; | ||
995 | goto exit; | ||
996 | } | ||
997 | |||
998 | if (forward) | ||
999 | head = &source_entry->class->locks_after; | ||
1000 | else | ||
1001 | head = &source_entry->class->locks_before; | ||
1002 | |||
1003 | if (list_empty(head)) | ||
1004 | goto exit; | ||
1005 | |||
1006 | __cq_init(cq); | ||
1007 | __cq_enqueue(cq, (unsigned long)source_entry); | ||
1008 | |||
1009 | while (!__cq_empty(cq)) { | ||
1010 | struct lock_list *lock; | ||
1011 | |||
1012 | __cq_dequeue(cq, (unsigned long *)&lock); | ||
1013 | |||
1014 | if (!lock->class) { | ||
1015 | ret = -2; | ||
1016 | goto exit; | ||
1017 | } | ||
1018 | |||
1019 | if (forward) | ||
1020 | head = &lock->class->locks_after; | ||
1021 | else | ||
1022 | head = &lock->class->locks_before; | ||
1023 | |||
1024 | list_for_each_entry(entry, head, entry) { | ||
1025 | if (!lock_accessed(entry)) { | ||
1026 | unsigned int cq_depth; | ||
1027 | mark_lock_accessed(entry, lock); | ||
1028 | if (match(entry, data)) { | ||
1029 | *target_entry = entry; | ||
1030 | ret = 0; | ||
1031 | goto exit; | ||
1032 | } | ||
1033 | |||
1034 | if (__cq_enqueue(cq, (unsigned long)entry)) { | ||
1035 | ret = -1; | ||
1036 | goto exit; | ||
1037 | } | ||
1038 | cq_depth = __cq_get_elem_count(cq); | ||
1039 | if (max_bfs_queue_depth < cq_depth) | ||
1040 | max_bfs_queue_depth = cq_depth; | ||
1041 | } | ||
1042 | } | ||
1043 | } | ||
1044 | exit: | ||
1045 | return ret; | ||
1046 | } | ||
1047 | |||
1048 | static inline int __bfs_forwards(struct lock_list *src_entry, | ||
1049 | void *data, | ||
1050 | int (*match)(struct lock_list *entry, void *data), | ||
1051 | struct lock_list **target_entry) | ||
1052 | { | ||
1053 | return __bfs(src_entry, data, match, target_entry, 1); | ||
1054 | |||
1055 | } | ||
1056 | |||
1057 | static inline int __bfs_backwards(struct lock_list *src_entry, | ||
1058 | void *data, | ||
1059 | int (*match)(struct lock_list *entry, void *data), | ||
1060 | struct lock_list **target_entry) | ||
1061 | { | ||
1062 | return __bfs(src_entry, data, match, target_entry, 0); | ||
1063 | |||
1064 | } | ||
1065 | |||
1066 | /* | ||
1067 | * Recursive, forwards-direction lock-dependency checking, used for | ||
1068 | * both noncyclic checking and for hardirq-unsafe/softirq-unsafe | ||
1069 | * checking. | ||
1070 | */ | ||
1071 | |||
1072 | /* | ||
1073 | * Print a dependency chain entry (this is only done when a deadlock | ||
1074 | * has been detected): | ||
1075 | */ | ||
1076 | static noinline int | ||
1077 | print_circular_bug_entry(struct lock_list *target, int depth) | ||
1078 | { | ||
1079 | if (debug_locks_silent) | ||
1080 | return 0; | ||
1081 | printk("\n-> #%u", depth); | ||
1082 | print_lock_name(target->class); | ||
1083 | printk(":\n"); | ||
1084 | print_stack_trace(&target->trace, 6); | ||
1085 | |||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | static void | ||
1090 | print_circular_lock_scenario(struct held_lock *src, | ||
1091 | struct held_lock *tgt, | ||
1092 | struct lock_list *prt) | ||
1093 | { | ||
1094 | struct lock_class *source = hlock_class(src); | ||
1095 | struct lock_class *target = hlock_class(tgt); | ||
1096 | struct lock_class *parent = prt->class; | ||
1097 | |||
1098 | /* | ||
1099 | * A direct locking problem where unsafe_class lock is taken | ||
1100 | * directly by safe_class lock, then all we need to show | ||
1101 | * is the deadlock scenario, as it is obvious that the | ||
1102 | * unsafe lock is taken under the safe lock. | ||
1103 | * | ||
1104 | * But if there is a chain instead, where the safe lock takes | ||
1105 | * an intermediate lock (middle_class) where this lock is | ||
1106 | * not the same as the safe lock, then the lock chain is | ||
1107 | * used to describe the problem. Otherwise we would need | ||
1108 | * to show a different CPU case for each link in the chain | ||
1109 | * from the safe_class lock to the unsafe_class lock. | ||
1110 | */ | ||
1111 | if (parent != source) { | ||
1112 | printk("Chain exists of:\n "); | ||
1113 | __print_lock_name(source); | ||
1114 | printk(" --> "); | ||
1115 | __print_lock_name(parent); | ||
1116 | printk(" --> "); | ||
1117 | __print_lock_name(target); | ||
1118 | printk("\n\n"); | ||
1119 | } | ||
1120 | |||
1121 | printk(" Possible unsafe locking scenario:\n\n"); | ||
1122 | printk(" CPU0 CPU1\n"); | ||
1123 | printk(" ---- ----\n"); | ||
1124 | printk(" lock("); | ||
1125 | __print_lock_name(target); | ||
1126 | printk(");\n"); | ||
1127 | printk(" lock("); | ||
1128 | __print_lock_name(parent); | ||
1129 | printk(");\n"); | ||
1130 | printk(" lock("); | ||
1131 | __print_lock_name(target); | ||
1132 | printk(");\n"); | ||
1133 | printk(" lock("); | ||
1134 | __print_lock_name(source); | ||
1135 | printk(");\n"); | ||
1136 | printk("\n *** DEADLOCK ***\n\n"); | ||
1137 | } | ||
1138 | |||
1139 | /* | ||
1140 | * When a circular dependency is detected, print the | ||
1141 | * header first: | ||
1142 | */ | ||
1143 | static noinline int | ||
1144 | print_circular_bug_header(struct lock_list *entry, unsigned int depth, | ||
1145 | struct held_lock *check_src, | ||
1146 | struct held_lock *check_tgt) | ||
1147 | { | ||
1148 | struct task_struct *curr = current; | ||
1149 | |||
1150 | if (debug_locks_silent) | ||
1151 | return 0; | ||
1152 | |||
1153 | printk("\n"); | ||
1154 | printk("======================================================\n"); | ||
1155 | printk("[ INFO: possible circular locking dependency detected ]\n"); | ||
1156 | print_kernel_ident(); | ||
1157 | printk("-------------------------------------------------------\n"); | ||
1158 | printk("%s/%d is trying to acquire lock:\n", | ||
1159 | curr->comm, task_pid_nr(curr)); | ||
1160 | print_lock(check_src); | ||
1161 | printk("\nbut task is already holding lock:\n"); | ||
1162 | print_lock(check_tgt); | ||
1163 | printk("\nwhich lock already depends on the new lock.\n\n"); | ||
1164 | printk("\nthe existing dependency chain (in reverse order) is:\n"); | ||
1165 | |||
1166 | print_circular_bug_entry(entry, depth); | ||
1167 | |||
1168 | return 0; | ||
1169 | } | ||
1170 | |||
1171 | static inline int class_equal(struct lock_list *entry, void *data) | ||
1172 | { | ||
1173 | return entry->class == data; | ||
1174 | } | ||
1175 | |||
1176 | static noinline int print_circular_bug(struct lock_list *this, | ||
1177 | struct lock_list *target, | ||
1178 | struct held_lock *check_src, | ||
1179 | struct held_lock *check_tgt) | ||
1180 | { | ||
1181 | struct task_struct *curr = current; | ||
1182 | struct lock_list *parent; | ||
1183 | struct lock_list *first_parent; | ||
1184 | int depth; | ||
1185 | |||
1186 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
1187 | return 0; | ||
1188 | |||
1189 | if (!save_trace(&this->trace)) | ||
1190 | return 0; | ||
1191 | |||
1192 | depth = get_lock_depth(target); | ||
1193 | |||
1194 | print_circular_bug_header(target, depth, check_src, check_tgt); | ||
1195 | |||
1196 | parent = get_lock_parent(target); | ||
1197 | first_parent = parent; | ||
1198 | |||
1199 | while (parent) { | ||
1200 | print_circular_bug_entry(parent, --depth); | ||
1201 | parent = get_lock_parent(parent); | ||
1202 | } | ||
1203 | |||
1204 | printk("\nother info that might help us debug this:\n\n"); | ||
1205 | print_circular_lock_scenario(check_src, check_tgt, | ||
1206 | first_parent); | ||
1207 | |||
1208 | lockdep_print_held_locks(curr); | ||
1209 | |||
1210 | printk("\nstack backtrace:\n"); | ||
1211 | dump_stack(); | ||
1212 | |||
1213 | return 0; | ||
1214 | } | ||
1215 | |||
1216 | static noinline int print_bfs_bug(int ret) | ||
1217 | { | ||
1218 | if (!debug_locks_off_graph_unlock()) | ||
1219 | return 0; | ||
1220 | |||
1221 | /* | ||
1222 | * Breadth-first-search failed, graph got corrupted? | ||
1223 | */ | ||
1224 | WARN(1, "lockdep bfs error:%d\n", ret); | ||
1225 | |||
1226 | return 0; | ||
1227 | } | ||
1228 | |||
1229 | static int noop_count(struct lock_list *entry, void *data) | ||
1230 | { | ||
1231 | (*(unsigned long *)data)++; | ||
1232 | return 0; | ||
1233 | } | ||
1234 | |||
1235 | static unsigned long __lockdep_count_forward_deps(struct lock_list *this) | ||
1236 | { | ||
1237 | unsigned long count = 0; | ||
1238 | struct lock_list *uninitialized_var(target_entry); | ||
1239 | |||
1240 | __bfs_forwards(this, (void *)&count, noop_count, &target_entry); | ||
1241 | |||
1242 | return count; | ||
1243 | } | ||
1244 | unsigned long lockdep_count_forward_deps(struct lock_class *class) | ||
1245 | { | ||
1246 | unsigned long ret, flags; | ||
1247 | struct lock_list this; | ||
1248 | |||
1249 | this.parent = NULL; | ||
1250 | this.class = class; | ||
1251 | |||
1252 | local_irq_save(flags); | ||
1253 | arch_spin_lock(&lockdep_lock); | ||
1254 | ret = __lockdep_count_forward_deps(&this); | ||
1255 | arch_spin_unlock(&lockdep_lock); | ||
1256 | local_irq_restore(flags); | ||
1257 | |||
1258 | return ret; | ||
1259 | } | ||
1260 | |||
1261 | static unsigned long __lockdep_count_backward_deps(struct lock_list *this) | ||
1262 | { | ||
1263 | unsigned long count = 0; | ||
1264 | struct lock_list *uninitialized_var(target_entry); | ||
1265 | |||
1266 | __bfs_backwards(this, (void *)&count, noop_count, &target_entry); | ||
1267 | |||
1268 | return count; | ||
1269 | } | ||
1270 | |||
1271 | unsigned long lockdep_count_backward_deps(struct lock_class *class) | ||
1272 | { | ||
1273 | unsigned long ret, flags; | ||
1274 | struct lock_list this; | ||
1275 | |||
1276 | this.parent = NULL; | ||
1277 | this.class = class; | ||
1278 | |||
1279 | local_irq_save(flags); | ||
1280 | arch_spin_lock(&lockdep_lock); | ||
1281 | ret = __lockdep_count_backward_deps(&this); | ||
1282 | arch_spin_unlock(&lockdep_lock); | ||
1283 | local_irq_restore(flags); | ||
1284 | |||
1285 | return ret; | ||
1286 | } | ||
1287 | |||
1288 | /* | ||
1289 | * Prove that the dependency graph starting at <entry> can not | ||
1290 | * lead to <target>. Print an error and return 0 if it does. | ||
1291 | */ | ||
1292 | static noinline int | ||
1293 | check_noncircular(struct lock_list *root, struct lock_class *target, | ||
1294 | struct lock_list **target_entry) | ||
1295 | { | ||
1296 | int result; | ||
1297 | |||
1298 | debug_atomic_inc(nr_cyclic_checks); | ||
1299 | |||
1300 | result = __bfs_forwards(root, target, class_equal, target_entry); | ||
1301 | |||
1302 | return result; | ||
1303 | } | ||
1304 | |||
1305 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | ||
1306 | /* | ||
1307 | * Forwards and backwards subgraph searching, for the purposes of | ||
1308 | * proving that two subgraphs can be connected by a new dependency | ||
1309 | * without creating any illegal irq-safe -> irq-unsafe lock dependency. | ||
1310 | */ | ||
1311 | |||
1312 | static inline int usage_match(struct lock_list *entry, void *bit) | ||
1313 | { | ||
1314 | return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); | ||
1315 | } | ||
1316 | |||
1317 | |||
1318 | |||
1319 | /* | ||
1320 | * Find a node in the forwards-direction dependency sub-graph starting | ||
1321 | * at @root->class that matches @bit. | ||
1322 | * | ||
1323 | * Return 0 if such a node exists in the subgraph, and put that node | ||
1324 | * into *@target_entry. | ||
1325 | * | ||
1326 | * Return 1 otherwise and keep *@target_entry unchanged. | ||
1327 | * Return <0 on error. | ||
1328 | */ | ||
1329 | static int | ||
1330 | find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | ||
1331 | struct lock_list **target_entry) | ||
1332 | { | ||
1333 | int result; | ||
1334 | |||
1335 | debug_atomic_inc(nr_find_usage_forwards_checks); | ||
1336 | |||
1337 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); | ||
1338 | |||
1339 | return result; | ||
1340 | } | ||
1341 | |||
1342 | /* | ||
1343 | * Find a node in the backwards-direction dependency sub-graph starting | ||
1344 | * at @root->class that matches @bit. | ||
1345 | * | ||
1346 | * Return 0 if such a node exists in the subgraph, and put that node | ||
1347 | * into *@target_entry. | ||
1348 | * | ||
1349 | * Return 1 otherwise and keep *@target_entry unchanged. | ||
1350 | * Return <0 on error. | ||
1351 | */ | ||
1352 | static int | ||
1353 | find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, | ||
1354 | struct lock_list **target_entry) | ||
1355 | { | ||
1356 | int result; | ||
1357 | |||
1358 | debug_atomic_inc(nr_find_usage_backwards_checks); | ||
1359 | |||
1360 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); | ||
1361 | |||
1362 | return result; | ||
1363 | } | ||
1364 | |||
1365 | static void print_lock_class_header(struct lock_class *class, int depth) | ||
1366 | { | ||
1367 | int bit; | ||
1368 | |||
1369 | printk("%*s->", depth, ""); | ||
1370 | print_lock_name(class); | ||
1371 | printk(" ops: %lu", class->ops); | ||
1372 | printk(" {\n"); | ||
1373 | |||
1374 | for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { | ||
1375 | if (class->usage_mask & (1 << bit)) { | ||
1376 | int len = depth; | ||
1377 | |||
1378 | len += printk("%*s %s", depth, "", usage_str[bit]); | ||
1379 | len += printk(" at:\n"); | ||
1380 | print_stack_trace(class->usage_traces + bit, len); | ||
1381 | } | ||
1382 | } | ||
1383 | printk("%*s }\n", depth, ""); | ||
1384 | |||
1385 | printk("%*s ... key at: ",depth,""); | ||
1386 | print_ip_sym((unsigned long)class->key); | ||
1387 | } | ||
1388 | |||
1389 | /* | ||
1390 | * printk the shortest lock dependencies from @start to @end in reverse order: | ||
1391 | */ | ||
1392 | static void __used | ||
1393 | print_shortest_lock_dependencies(struct lock_list *leaf, | ||
1394 | struct lock_list *root) | ||
1395 | { | ||
1396 | struct lock_list *entry = leaf; | ||
1397 | int depth; | ||
1398 | |||
1399 | /*compute depth from generated tree by BFS*/ | ||
1400 | depth = get_lock_depth(leaf); | ||
1401 | |||
1402 | do { | ||
1403 | print_lock_class_header(entry->class, depth); | ||
1404 | printk("%*s ... acquired at:\n", depth, ""); | ||
1405 | print_stack_trace(&entry->trace, 2); | ||
1406 | printk("\n"); | ||
1407 | |||
1408 | if (depth == 0 && (entry != root)) { | ||
1409 | printk("lockdep:%s bad path found in chain graph\n", __func__); | ||
1410 | break; | ||
1411 | } | ||
1412 | |||
1413 | entry = get_lock_parent(entry); | ||
1414 | depth--; | ||
1415 | } while (entry && (depth >= 0)); | ||
1416 | |||
1417 | return; | ||
1418 | } | ||
1419 | |||
1420 | static void | ||
1421 | print_irq_lock_scenario(struct lock_list *safe_entry, | ||
1422 | struct lock_list *unsafe_entry, | ||
1423 | struct lock_class *prev_class, | ||
1424 | struct lock_class *next_class) | ||
1425 | { | ||
1426 | struct lock_class *safe_class = safe_entry->class; | ||
1427 | struct lock_class *unsafe_class = unsafe_entry->class; | ||
1428 | struct lock_class *middle_class = prev_class; | ||
1429 | |||
1430 | if (middle_class == safe_class) | ||
1431 | middle_class = next_class; | ||
1432 | |||
1433 | /* | ||
1434 | * A direct locking problem where unsafe_class lock is taken | ||
1435 | * directly by safe_class lock, then all we need to show | ||
1436 | * is the deadlock scenario, as it is obvious that the | ||
1437 | * unsafe lock is taken under the safe lock. | ||
1438 | * | ||
1439 | * But if there is a chain instead, where the safe lock takes | ||
1440 | * an intermediate lock (middle_class) where this lock is | ||
1441 | * not the same as the safe lock, then the lock chain is | ||
1442 | * used to describe the problem. Otherwise we would need | ||
1443 | * to show a different CPU case for each link in the chain | ||
1444 | * from the safe_class lock to the unsafe_class lock. | ||
1445 | */ | ||
1446 | if (middle_class != unsafe_class) { | ||
1447 | printk("Chain exists of:\n "); | ||
1448 | __print_lock_name(safe_class); | ||
1449 | printk(" --> "); | ||
1450 | __print_lock_name(middle_class); | ||
1451 | printk(" --> "); | ||
1452 | __print_lock_name(unsafe_class); | ||
1453 | printk("\n\n"); | ||
1454 | } | ||
1455 | |||
1456 | printk(" Possible interrupt unsafe locking scenario:\n\n"); | ||
1457 | printk(" CPU0 CPU1\n"); | ||
1458 | printk(" ---- ----\n"); | ||
1459 | printk(" lock("); | ||
1460 | __print_lock_name(unsafe_class); | ||
1461 | printk(");\n"); | ||
1462 | printk(" local_irq_disable();\n"); | ||
1463 | printk(" lock("); | ||
1464 | __print_lock_name(safe_class); | ||
1465 | printk(");\n"); | ||
1466 | printk(" lock("); | ||
1467 | __print_lock_name(middle_class); | ||
1468 | printk(");\n"); | ||
1469 | printk(" <Interrupt>\n"); | ||
1470 | printk(" lock("); | ||
1471 | __print_lock_name(safe_class); | ||
1472 | printk(");\n"); | ||
1473 | printk("\n *** DEADLOCK ***\n\n"); | ||
1474 | } | ||
1475 | |||
1476 | static int | ||
1477 | print_bad_irq_dependency(struct task_struct *curr, | ||
1478 | struct lock_list *prev_root, | ||
1479 | struct lock_list *next_root, | ||
1480 | struct lock_list *backwards_entry, | ||
1481 | struct lock_list *forwards_entry, | ||
1482 | struct held_lock *prev, | ||
1483 | struct held_lock *next, | ||
1484 | enum lock_usage_bit bit1, | ||
1485 | enum lock_usage_bit bit2, | ||
1486 | const char *irqclass) | ||
1487 | { | ||
1488 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
1489 | return 0; | ||
1490 | |||
1491 | printk("\n"); | ||
1492 | printk("======================================================\n"); | ||
1493 | printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||
1494 | irqclass, irqclass); | ||
1495 | print_kernel_ident(); | ||
1496 | printk("------------------------------------------------------\n"); | ||
1497 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", | ||
1498 | curr->comm, task_pid_nr(curr), | ||
1499 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, | ||
1500 | curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, | ||
1501 | curr->hardirqs_enabled, | ||
1502 | curr->softirqs_enabled); | ||
1503 | print_lock(next); | ||
1504 | |||
1505 | printk("\nand this task is already holding:\n"); | ||
1506 | print_lock(prev); | ||
1507 | printk("which would create a new lock dependency:\n"); | ||
1508 | print_lock_name(hlock_class(prev)); | ||
1509 | printk(" ->"); | ||
1510 | print_lock_name(hlock_class(next)); | ||
1511 | printk("\n"); | ||
1512 | |||
1513 | printk("\nbut this new dependency connects a %s-irq-safe lock:\n", | ||
1514 | irqclass); | ||
1515 | print_lock_name(backwards_entry->class); | ||
1516 | printk("\n... which became %s-irq-safe at:\n", irqclass); | ||
1517 | |||
1518 | print_stack_trace(backwards_entry->class->usage_traces + bit1, 1); | ||
1519 | |||
1520 | printk("\nto a %s-irq-unsafe lock:\n", irqclass); | ||
1521 | print_lock_name(forwards_entry->class); | ||
1522 | printk("\n... which became %s-irq-unsafe at:\n", irqclass); | ||
1523 | printk("..."); | ||
1524 | |||
1525 | print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); | ||
1526 | |||
1527 | printk("\nother info that might help us debug this:\n\n"); | ||
1528 | print_irq_lock_scenario(backwards_entry, forwards_entry, | ||
1529 | hlock_class(prev), hlock_class(next)); | ||
1530 | |||
1531 | lockdep_print_held_locks(curr); | ||
1532 | |||
1533 | printk("\nthe dependencies between %s-irq-safe lock", irqclass); | ||
1534 | printk(" and the holding lock:\n"); | ||
1535 | if (!save_trace(&prev_root->trace)) | ||
1536 | return 0; | ||
1537 | print_shortest_lock_dependencies(backwards_entry, prev_root); | ||
1538 | |||
1539 | printk("\nthe dependencies between the lock to be acquired"); | ||
1540 | printk(" and %s-irq-unsafe lock:\n", irqclass); | ||
1541 | if (!save_trace(&next_root->trace)) | ||
1542 | return 0; | ||
1543 | print_shortest_lock_dependencies(forwards_entry, next_root); | ||
1544 | |||
1545 | printk("\nstack backtrace:\n"); | ||
1546 | dump_stack(); | ||
1547 | |||
1548 | return 0; | ||
1549 | } | ||
1550 | |||
1551 | static int | ||
1552 | check_usage(struct task_struct *curr, struct held_lock *prev, | ||
1553 | struct held_lock *next, enum lock_usage_bit bit_backwards, | ||
1554 | enum lock_usage_bit bit_forwards, const char *irqclass) | ||
1555 | { | ||
1556 | int ret; | ||
1557 | struct lock_list this, that; | ||
1558 | struct lock_list *uninitialized_var(target_entry); | ||
1559 | struct lock_list *uninitialized_var(target_entry1); | ||
1560 | |||
1561 | this.parent = NULL; | ||
1562 | |||
1563 | this.class = hlock_class(prev); | ||
1564 | ret = find_usage_backwards(&this, bit_backwards, &target_entry); | ||
1565 | if (ret < 0) | ||
1566 | return print_bfs_bug(ret); | ||
1567 | if (ret == 1) | ||
1568 | return ret; | ||
1569 | |||
1570 | that.parent = NULL; | ||
1571 | that.class = hlock_class(next); | ||
1572 | ret = find_usage_forwards(&that, bit_forwards, &target_entry1); | ||
1573 | if (ret < 0) | ||
1574 | return print_bfs_bug(ret); | ||
1575 | if (ret == 1) | ||
1576 | return ret; | ||
1577 | |||
1578 | return print_bad_irq_dependency(curr, &this, &that, | ||
1579 | target_entry, target_entry1, | ||
1580 | prev, next, | ||
1581 | bit_backwards, bit_forwards, irqclass); | ||
1582 | } | ||
1583 | |||
1584 | static const char *state_names[] = { | ||
1585 | #define LOCKDEP_STATE(__STATE) \ | ||
1586 | __stringify(__STATE), | ||
1587 | #include "lockdep_states.h" | ||
1588 | #undef LOCKDEP_STATE | ||
1589 | }; | ||
1590 | |||
1591 | static const char *state_rnames[] = { | ||
1592 | #define LOCKDEP_STATE(__STATE) \ | ||
1593 | __stringify(__STATE)"-READ", | ||
1594 | #include "lockdep_states.h" | ||
1595 | #undef LOCKDEP_STATE | ||
1596 | }; | ||
1597 | |||
1598 | static inline const char *state_name(enum lock_usage_bit bit) | ||
1599 | { | ||
1600 | return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2]; | ||
1601 | } | ||
1602 | |||
1603 | static int exclusive_bit(int new_bit) | ||
1604 | { | ||
1605 | /* | ||
1606 | * USED_IN | ||
1607 | * USED_IN_READ | ||
1608 | * ENABLED | ||
1609 | * ENABLED_READ | ||
1610 | * | ||
1611 | * bit 0 - write/read | ||
1612 | * bit 1 - used_in/enabled | ||
1613 | * bit 2+ state | ||
1614 | */ | ||
1615 | |||
1616 | int state = new_bit & ~3; | ||
1617 | int dir = new_bit & 2; | ||
1618 | |||
1619 | /* | ||
1620 | * keep state, bit flip the direction and strip read. | ||
1621 | */ | ||
1622 | return state | (dir ^ 2); | ||
1623 | } | ||
1624 | |||
1625 | static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, | ||
1626 | struct held_lock *next, enum lock_usage_bit bit) | ||
1627 | { | ||
1628 | /* | ||
1629 | * Prove that the new dependency does not connect a hardirq-safe | ||
1630 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
1631 | * the backwards-subgraph starting at <prev>, and the | ||
1632 | * forwards-subgraph starting at <next>: | ||
1633 | */ | ||
1634 | if (!check_usage(curr, prev, next, bit, | ||
1635 | exclusive_bit(bit), state_name(bit))) | ||
1636 | return 0; | ||
1637 | |||
1638 | bit++; /* _READ */ | ||
1639 | |||
1640 | /* | ||
1641 | * Prove that the new dependency does not connect a hardirq-safe-read | ||
1642 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
1643 | * the backwards-subgraph starting at <prev>, and the | ||
1644 | * forwards-subgraph starting at <next>: | ||
1645 | */ | ||
1646 | if (!check_usage(curr, prev, next, bit, | ||
1647 | exclusive_bit(bit), state_name(bit))) | ||
1648 | return 0; | ||
1649 | |||
1650 | return 1; | ||
1651 | } | ||
1652 | |||
1653 | static int | ||
1654 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | ||
1655 | struct held_lock *next) | ||
1656 | { | ||
1657 | #define LOCKDEP_STATE(__STATE) \ | ||
1658 | if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \ | ||
1659 | return 0; | ||
1660 | #include "lockdep_states.h" | ||
1661 | #undef LOCKDEP_STATE | ||
1662 | |||
1663 | return 1; | ||
1664 | } | ||
1665 | |||
1666 | static void inc_chains(void) | ||
1667 | { | ||
1668 | if (current->hardirq_context) | ||
1669 | nr_hardirq_chains++; | ||
1670 | else { | ||
1671 | if (current->softirq_context) | ||
1672 | nr_softirq_chains++; | ||
1673 | else | ||
1674 | nr_process_chains++; | ||
1675 | } | ||
1676 | } | ||
1677 | |||
1678 | #else | ||
1679 | |||
1680 | static inline int | ||
1681 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | ||
1682 | struct held_lock *next) | ||
1683 | { | ||
1684 | return 1; | ||
1685 | } | ||
1686 | |||
1687 | static inline void inc_chains(void) | ||
1688 | { | ||
1689 | nr_process_chains++; | ||
1690 | } | ||
1691 | |||
1692 | #endif | ||
1693 | |||
1694 | static void | ||
1695 | print_deadlock_scenario(struct held_lock *nxt, | ||
1696 | struct held_lock *prv) | ||
1697 | { | ||
1698 | struct lock_class *next = hlock_class(nxt); | ||
1699 | struct lock_class *prev = hlock_class(prv); | ||
1700 | |||
1701 | printk(" Possible unsafe locking scenario:\n\n"); | ||
1702 | printk(" CPU0\n"); | ||
1703 | printk(" ----\n"); | ||
1704 | printk(" lock("); | ||
1705 | __print_lock_name(prev); | ||
1706 | printk(");\n"); | ||
1707 | printk(" lock("); | ||
1708 | __print_lock_name(next); | ||
1709 | printk(");\n"); | ||
1710 | printk("\n *** DEADLOCK ***\n\n"); | ||
1711 | printk(" May be due to missing lock nesting notation\n\n"); | ||
1712 | } | ||
1713 | |||
1714 | static int | ||
1715 | print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | ||
1716 | struct held_lock *next) | ||
1717 | { | ||
1718 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
1719 | return 0; | ||
1720 | |||
1721 | printk("\n"); | ||
1722 | printk("=============================================\n"); | ||
1723 | printk("[ INFO: possible recursive locking detected ]\n"); | ||
1724 | print_kernel_ident(); | ||
1725 | printk("---------------------------------------------\n"); | ||
1726 | printk("%s/%d is trying to acquire lock:\n", | ||
1727 | curr->comm, task_pid_nr(curr)); | ||
1728 | print_lock(next); | ||
1729 | printk("\nbut task is already holding lock:\n"); | ||
1730 | print_lock(prev); | ||
1731 | |||
1732 | printk("\nother info that might help us debug this:\n"); | ||
1733 | print_deadlock_scenario(next, prev); | ||
1734 | lockdep_print_held_locks(curr); | ||
1735 | |||
1736 | printk("\nstack backtrace:\n"); | ||
1737 | dump_stack(); | ||
1738 | |||
1739 | return 0; | ||
1740 | } | ||
1741 | |||
1742 | /* | ||
1743 | * Check whether we are holding such a class already. | ||
1744 | * | ||
1745 | * (Note that this has to be done separately, because the graph cannot | ||
1746 | * detect such classes of deadlocks.) | ||
1747 | * | ||
1748 | * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read | ||
1749 | */ | ||
1750 | static int | ||
1751 | check_deadlock(struct task_struct *curr, struct held_lock *next, | ||
1752 | struct lockdep_map *next_instance, int read) | ||
1753 | { | ||
1754 | struct held_lock *prev; | ||
1755 | struct held_lock *nest = NULL; | ||
1756 | int i; | ||
1757 | |||
1758 | for (i = 0; i < curr->lockdep_depth; i++) { | ||
1759 | prev = curr->held_locks + i; | ||
1760 | |||
1761 | if (prev->instance == next->nest_lock) | ||
1762 | nest = prev; | ||
1763 | |||
1764 | if (hlock_class(prev) != hlock_class(next)) | ||
1765 | continue; | ||
1766 | |||
1767 | /* | ||
1768 | * Allow read-after-read recursion of the same | ||
1769 | * lock class (i.e. read_lock(lock)+read_lock(lock)): | ||
1770 | */ | ||
1771 | if ((read == 2) && prev->read) | ||
1772 | return 2; | ||
1773 | |||
1774 | /* | ||
1775 | * We're holding the nest_lock, which serializes this lock's | ||
1776 | * nesting behaviour. | ||
1777 | */ | ||
1778 | if (nest) | ||
1779 | return 2; | ||
1780 | |||
1781 | return print_deadlock_bug(curr, prev, next); | ||
1782 | } | ||
1783 | return 1; | ||
1784 | } | ||
1785 | |||
1786 | /* | ||
1787 | * There was a chain-cache miss, and we are about to add a new dependency | ||
1788 | * to a previous lock. We recursively validate the following rules: | ||
1789 | * | ||
1790 | * - would the adding of the <prev> -> <next> dependency create a | ||
1791 | * circular dependency in the graph? [== circular deadlock] | ||
1792 | * | ||
1793 | * - does the new prev->next dependency connect any hardirq-safe lock | ||
1794 | * (in the full backwards-subgraph starting at <prev>) with any | ||
1795 | * hardirq-unsafe lock (in the full forwards-subgraph starting at | ||
1796 | * <next>)? [== illegal lock inversion with hardirq contexts] | ||
1797 | * | ||
1798 | * - does the new prev->next dependency connect any softirq-safe lock | ||
1799 | * (in the full backwards-subgraph starting at <prev>) with any | ||
1800 | * softirq-unsafe lock (in the full forwards-subgraph starting at | ||
1801 | * <next>)? [== illegal lock inversion with softirq contexts] | ||
1802 | * | ||
1803 | * any of these scenarios could lead to a deadlock. | ||
1804 | * | ||
1805 | * Then if all the validations pass, we add the forwards and backwards | ||
1806 | * dependency. | ||
1807 | */ | ||
1808 | static int | ||
1809 | check_prev_add(struct task_struct *curr, struct held_lock *prev, | ||
1810 | struct held_lock *next, int distance, int trylock_loop) | ||
1811 | { | ||
1812 | struct lock_list *entry; | ||
1813 | int ret; | ||
1814 | struct lock_list this; | ||
1815 | struct lock_list *uninitialized_var(target_entry); | ||
1816 | /* | ||
1817 | * Static variable, serialized by the graph_lock(). | ||
1818 | * | ||
1819 | * We use this static variable to save the stack trace in case | ||
1820 | * we call into this function multiple times due to encountering | ||
1821 | * trylocks in the held lock stack. | ||
1822 | */ | ||
1823 | static struct stack_trace trace; | ||
1824 | |||
1825 | /* | ||
1826 | * Prove that the new <prev> -> <next> dependency would not | ||
1827 | * create a circular dependency in the graph. (We do this by | ||
1828 | * forward-recursing into the graph starting at <next>, and | ||
1829 | * checking whether we can reach <prev>.) | ||
1830 | * | ||
1831 | * We are using global variables to control the recursion, to | ||
1832 | * keep the stackframe size of the recursive functions low: | ||
1833 | */ | ||
1834 | this.class = hlock_class(next); | ||
1835 | this.parent = NULL; | ||
1836 | ret = check_noncircular(&this, hlock_class(prev), &target_entry); | ||
1837 | if (unlikely(!ret)) | ||
1838 | return print_circular_bug(&this, target_entry, next, prev); | ||
1839 | else if (unlikely(ret < 0)) | ||
1840 | return print_bfs_bug(ret); | ||
1841 | |||
1842 | if (!check_prev_add_irq(curr, prev, next)) | ||
1843 | return 0; | ||
1844 | |||
1845 | /* | ||
1846 | * For recursive read-locks we do all the dependency checks, | ||
1847 | * but we dont store read-triggered dependencies (only | ||
1848 | * write-triggered dependencies). This ensures that only the | ||
1849 | * write-side dependencies matter, and that if for example a | ||
1850 | * write-lock never takes any other locks, then the reads are | ||
1851 | * equivalent to a NOP. | ||
1852 | */ | ||
1853 | if (next->read == 2 || prev->read == 2) | ||
1854 | return 1; | ||
1855 | /* | ||
1856 | * Is the <prev> -> <next> dependency already present? | ||
1857 | * | ||
1858 | * (this may occur even though this is a new chain: consider | ||
1859 | * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3 | ||
1860 | * chains - the second one will be new, but L1 already has | ||
1861 | * L2 added to its dependency list, due to the first chain.) | ||
1862 | */ | ||
1863 | list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) { | ||
1864 | if (entry->class == hlock_class(next)) { | ||
1865 | if (distance == 1) | ||
1866 | entry->distance = 1; | ||
1867 | return 2; | ||
1868 | } | ||
1869 | } | ||
1870 | |||
1871 | if (!trylock_loop && !save_trace(&trace)) | ||
1872 | return 0; | ||
1873 | |||
1874 | /* | ||
1875 | * Ok, all validations passed, add the new lock | ||
1876 | * to the previous lock's dependency list: | ||
1877 | */ | ||
1878 | ret = add_lock_to_list(hlock_class(prev), hlock_class(next), | ||
1879 | &hlock_class(prev)->locks_after, | ||
1880 | next->acquire_ip, distance, &trace); | ||
1881 | |||
1882 | if (!ret) | ||
1883 | return 0; | ||
1884 | |||
1885 | ret = add_lock_to_list(hlock_class(next), hlock_class(prev), | ||
1886 | &hlock_class(next)->locks_before, | ||
1887 | next->acquire_ip, distance, &trace); | ||
1888 | if (!ret) | ||
1889 | return 0; | ||
1890 | |||
1891 | /* | ||
1892 | * Debugging printouts: | ||
1893 | */ | ||
1894 | if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { | ||
1895 | graph_unlock(); | ||
1896 | printk("\n new dependency: "); | ||
1897 | print_lock_name(hlock_class(prev)); | ||
1898 | printk(" => "); | ||
1899 | print_lock_name(hlock_class(next)); | ||
1900 | printk("\n"); | ||
1901 | dump_stack(); | ||
1902 | return graph_lock(); | ||
1903 | } | ||
1904 | return 1; | ||
1905 | } | ||
1906 | |||
1907 | /* | ||
1908 | * Add the dependency to all directly-previous locks that are 'relevant'. | ||
1909 | * The ones that are relevant are (in increasing distance from curr): | ||
1910 | * all consecutive trylock entries and the final non-trylock entry - or | ||
1911 | * the end of this context's lock-chain - whichever comes first. | ||
1912 | */ | ||
1913 | static int | ||
1914 | check_prevs_add(struct task_struct *curr, struct held_lock *next) | ||
1915 | { | ||
1916 | int depth = curr->lockdep_depth; | ||
1917 | int trylock_loop = 0; | ||
1918 | struct held_lock *hlock; | ||
1919 | |||
1920 | /* | ||
1921 | * Debugging checks. | ||
1922 | * | ||
1923 | * Depth must not be zero for a non-head lock: | ||
1924 | */ | ||
1925 | if (!depth) | ||
1926 | goto out_bug; | ||
1927 | /* | ||
1928 | * At least two relevant locks must exist for this | ||
1929 | * to be a head: | ||
1930 | */ | ||
1931 | if (curr->held_locks[depth].irq_context != | ||
1932 | curr->held_locks[depth-1].irq_context) | ||
1933 | goto out_bug; | ||
1934 | |||
1935 | for (;;) { | ||
1936 | int distance = curr->lockdep_depth - depth + 1; | ||
1937 | hlock = curr->held_locks + depth-1; | ||
1938 | /* | ||
1939 | * Only non-recursive-read entries get new dependencies | ||
1940 | * added: | ||
1941 | */ | ||
1942 | if (hlock->read != 2) { | ||
1943 | if (!check_prev_add(curr, hlock, next, | ||
1944 | distance, trylock_loop)) | ||
1945 | return 0; | ||
1946 | /* | ||
1947 | * Stop after the first non-trylock entry, | ||
1948 | * as non-trylock entries have added their | ||
1949 | * own direct dependencies already, so this | ||
1950 | * lock is connected to them indirectly: | ||
1951 | */ | ||
1952 | if (!hlock->trylock) | ||
1953 | break; | ||
1954 | } | ||
1955 | depth--; | ||
1956 | /* | ||
1957 | * End of lock-stack? | ||
1958 | */ | ||
1959 | if (!depth) | ||
1960 | break; | ||
1961 | /* | ||
1962 | * Stop the search if we cross into another context: | ||
1963 | */ | ||
1964 | if (curr->held_locks[depth].irq_context != | ||
1965 | curr->held_locks[depth-1].irq_context) | ||
1966 | break; | ||
1967 | trylock_loop = 1; | ||
1968 | } | ||
1969 | return 1; | ||
1970 | out_bug: | ||
1971 | if (!debug_locks_off_graph_unlock()) | ||
1972 | return 0; | ||
1973 | |||
1974 | /* | ||
1975 | * Clearly we all shouldn't be here, but since we made it we | ||
1976 | * can reliable say we messed up our state. See the above two | ||
1977 | * gotos for reasons why we could possibly end up here. | ||
1978 | */ | ||
1979 | WARN_ON(1); | ||
1980 | |||
1981 | return 0; | ||
1982 | } | ||
1983 | |||
1984 | unsigned long nr_lock_chains; | ||
1985 | struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; | ||
1986 | int nr_chain_hlocks; | ||
1987 | static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS]; | ||
1988 | |||
1989 | struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) | ||
1990 | { | ||
1991 | return lock_classes + chain_hlocks[chain->base + i]; | ||
1992 | } | ||
1993 | |||
1994 | /* | ||
1995 | * Look up a dependency chain. If the key is not present yet then | ||
1996 | * add it and return 1 - in this case the new dependency chain is | ||
1997 | * validated. If the key is already hashed, return 0. | ||
1998 | * (On return with 1 graph_lock is held.) | ||
1999 | */ | ||
2000 | static inline int lookup_chain_cache(struct task_struct *curr, | ||
2001 | struct held_lock *hlock, | ||
2002 | u64 chain_key) | ||
2003 | { | ||
2004 | struct lock_class *class = hlock_class(hlock); | ||
2005 | struct list_head *hash_head = chainhashentry(chain_key); | ||
2006 | struct lock_chain *chain; | ||
2007 | struct held_lock *hlock_curr; | ||
2008 | int i, j; | ||
2009 | |||
2010 | /* | ||
2011 | * We might need to take the graph lock, ensure we've got IRQs | ||
2012 | * disabled to make this an IRQ-safe lock.. for recursion reasons | ||
2013 | * lockdep won't complain about its own locking errors. | ||
2014 | */ | ||
2015 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
2016 | return 0; | ||
2017 | /* | ||
2018 | * We can walk it lock-free, because entries only get added | ||
2019 | * to the hash: | ||
2020 | */ | ||
2021 | list_for_each_entry(chain, hash_head, entry) { | ||
2022 | if (chain->chain_key == chain_key) { | ||
2023 | cache_hit: | ||
2024 | debug_atomic_inc(chain_lookup_hits); | ||
2025 | if (very_verbose(class)) | ||
2026 | printk("\nhash chain already cached, key: " | ||
2027 | "%016Lx tail class: [%p] %s\n", | ||
2028 | (unsigned long long)chain_key, | ||
2029 | class->key, class->name); | ||
2030 | return 0; | ||
2031 | } | ||
2032 | } | ||
2033 | if (very_verbose(class)) | ||
2034 | printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", | ||
2035 | (unsigned long long)chain_key, class->key, class->name); | ||
2036 | /* | ||
2037 | * Allocate a new chain entry from the static array, and add | ||
2038 | * it to the hash: | ||
2039 | */ | ||
2040 | if (!graph_lock()) | ||
2041 | return 0; | ||
2042 | /* | ||
2043 | * We have to walk the chain again locked - to avoid duplicates: | ||
2044 | */ | ||
2045 | list_for_each_entry(chain, hash_head, entry) { | ||
2046 | if (chain->chain_key == chain_key) { | ||
2047 | graph_unlock(); | ||
2048 | goto cache_hit; | ||
2049 | } | ||
2050 | } | ||
2051 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | ||
2052 | if (!debug_locks_off_graph_unlock()) | ||
2053 | return 0; | ||
2054 | |||
2055 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); | ||
2056 | dump_stack(); | ||
2057 | return 0; | ||
2058 | } | ||
2059 | chain = lock_chains + nr_lock_chains++; | ||
2060 | chain->chain_key = chain_key; | ||
2061 | chain->irq_context = hlock->irq_context; | ||
2062 | /* Find the first held_lock of current chain */ | ||
2063 | for (i = curr->lockdep_depth - 1; i >= 0; i--) { | ||
2064 | hlock_curr = curr->held_locks + i; | ||
2065 | if (hlock_curr->irq_context != hlock->irq_context) | ||
2066 | break; | ||
2067 | } | ||
2068 | i++; | ||
2069 | chain->depth = curr->lockdep_depth + 1 - i; | ||
2070 | if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { | ||
2071 | chain->base = nr_chain_hlocks; | ||
2072 | nr_chain_hlocks += chain->depth; | ||
2073 | for (j = 0; j < chain->depth - 1; j++, i++) { | ||
2074 | int lock_id = curr->held_locks[i].class_idx - 1; | ||
2075 | chain_hlocks[chain->base + j] = lock_id; | ||
2076 | } | ||
2077 | chain_hlocks[chain->base + j] = class - lock_classes; | ||
2078 | } | ||
2079 | list_add_tail_rcu(&chain->entry, hash_head); | ||
2080 | debug_atomic_inc(chain_lookup_misses); | ||
2081 | inc_chains(); | ||
2082 | |||
2083 | return 1; | ||
2084 | } | ||
2085 | |||
2086 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | ||
2087 | struct held_lock *hlock, int chain_head, u64 chain_key) | ||
2088 | { | ||
2089 | /* | ||
2090 | * Trylock needs to maintain the stack of held locks, but it | ||
2091 | * does not add new dependencies, because trylock can be done | ||
2092 | * in any order. | ||
2093 | * | ||
2094 | * We look up the chain_key and do the O(N^2) check and update of | ||
2095 | * the dependencies only if this is a new dependency chain. | ||
2096 | * (If lookup_chain_cache() returns with 1 it acquires | ||
2097 | * graph_lock for us) | ||
2098 | */ | ||
2099 | if (!hlock->trylock && (hlock->check == 2) && | ||
2100 | lookup_chain_cache(curr, hlock, chain_key)) { | ||
2101 | /* | ||
2102 | * Check whether last held lock: | ||
2103 | * | ||
2104 | * - is irq-safe, if this lock is irq-unsafe | ||
2105 | * - is softirq-safe, if this lock is hardirq-unsafe | ||
2106 | * | ||
2107 | * And check whether the new lock's dependency graph | ||
2108 | * could lead back to the previous lock. | ||
2109 | * | ||
2110 | * any of these scenarios could lead to a deadlock. If | ||
2111 | * All validations | ||
2112 | */ | ||
2113 | int ret = check_deadlock(curr, hlock, lock, hlock->read); | ||
2114 | |||
2115 | if (!ret) | ||
2116 | return 0; | ||
2117 | /* | ||
2118 | * Mark recursive read, as we jump over it when | ||
2119 | * building dependencies (just like we jump over | ||
2120 | * trylock entries): | ||
2121 | */ | ||
2122 | if (ret == 2) | ||
2123 | hlock->read = 2; | ||
2124 | /* | ||
2125 | * Add dependency only if this lock is not the head | ||
2126 | * of the chain, and if it's not a secondary read-lock: | ||
2127 | */ | ||
2128 | if (!chain_head && ret != 2) | ||
2129 | if (!check_prevs_add(curr, hlock)) | ||
2130 | return 0; | ||
2131 | graph_unlock(); | ||
2132 | } else | ||
2133 | /* after lookup_chain_cache(): */ | ||
2134 | if (unlikely(!debug_locks)) | ||
2135 | return 0; | ||
2136 | |||
2137 | return 1; | ||
2138 | } | ||
2139 | #else | ||
2140 | static inline int validate_chain(struct task_struct *curr, | ||
2141 | struct lockdep_map *lock, struct held_lock *hlock, | ||
2142 | int chain_head, u64 chain_key) | ||
2143 | { | ||
2144 | return 1; | ||
2145 | } | ||
2146 | #endif | ||
2147 | |||
2148 | /* | ||
2149 | * We are building curr_chain_key incrementally, so double-check | ||
2150 | * it from scratch, to make sure that it's done correctly: | ||
2151 | */ | ||
2152 | static void check_chain_key(struct task_struct *curr) | ||
2153 | { | ||
2154 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
2155 | struct held_lock *hlock, *prev_hlock = NULL; | ||
2156 | unsigned int i, id; | ||
2157 | u64 chain_key = 0; | ||
2158 | |||
2159 | for (i = 0; i < curr->lockdep_depth; i++) { | ||
2160 | hlock = curr->held_locks + i; | ||
2161 | if (chain_key != hlock->prev_chain_key) { | ||
2162 | debug_locks_off(); | ||
2163 | /* | ||
2164 | * We got mighty confused, our chain keys don't match | ||
2165 | * with what we expect, someone trample on our task state? | ||
2166 | */ | ||
2167 | WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n", | ||
2168 | curr->lockdep_depth, i, | ||
2169 | (unsigned long long)chain_key, | ||
2170 | (unsigned long long)hlock->prev_chain_key); | ||
2171 | return; | ||
2172 | } | ||
2173 | id = hlock->class_idx - 1; | ||
2174 | /* | ||
2175 | * Whoops ran out of static storage again? | ||
2176 | */ | ||
2177 | if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) | ||
2178 | return; | ||
2179 | |||
2180 | if (prev_hlock && (prev_hlock->irq_context != | ||
2181 | hlock->irq_context)) | ||
2182 | chain_key = 0; | ||
2183 | chain_key = iterate_chain_key(chain_key, id); | ||
2184 | prev_hlock = hlock; | ||
2185 | } | ||
2186 | if (chain_key != curr->curr_chain_key) { | ||
2187 | debug_locks_off(); | ||
2188 | /* | ||
2189 | * More smoking hash instead of calculating it, damn see these | ||
2190 | * numbers float.. I bet that a pink elephant stepped on my memory. | ||
2191 | */ | ||
2192 | WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n", | ||
2193 | curr->lockdep_depth, i, | ||
2194 | (unsigned long long)chain_key, | ||
2195 | (unsigned long long)curr->curr_chain_key); | ||
2196 | } | ||
2197 | #endif | ||
2198 | } | ||
2199 | |||
2200 | static void | ||
2201 | print_usage_bug_scenario(struct held_lock *lock) | ||
2202 | { | ||
2203 | struct lock_class *class = hlock_class(lock); | ||
2204 | |||
2205 | printk(" Possible unsafe locking scenario:\n\n"); | ||
2206 | printk(" CPU0\n"); | ||
2207 | printk(" ----\n"); | ||
2208 | printk(" lock("); | ||
2209 | __print_lock_name(class); | ||
2210 | printk(");\n"); | ||
2211 | printk(" <Interrupt>\n"); | ||
2212 | printk(" lock("); | ||
2213 | __print_lock_name(class); | ||
2214 | printk(");\n"); | ||
2215 | printk("\n *** DEADLOCK ***\n\n"); | ||
2216 | } | ||
2217 | |||
2218 | static int | ||
2219 | print_usage_bug(struct task_struct *curr, struct held_lock *this, | ||
2220 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) | ||
2221 | { | ||
2222 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
2223 | return 0; | ||
2224 | |||
2225 | printk("\n"); | ||
2226 | printk("=================================\n"); | ||
2227 | printk("[ INFO: inconsistent lock state ]\n"); | ||
2228 | print_kernel_ident(); | ||
2229 | printk("---------------------------------\n"); | ||
2230 | |||
2231 | printk("inconsistent {%s} -> {%s} usage.\n", | ||
2232 | usage_str[prev_bit], usage_str[new_bit]); | ||
2233 | |||
2234 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", | ||
2235 | curr->comm, task_pid_nr(curr), | ||
2236 | trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, | ||
2237 | trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, | ||
2238 | trace_hardirqs_enabled(curr), | ||
2239 | trace_softirqs_enabled(curr)); | ||
2240 | print_lock(this); | ||
2241 | |||
2242 | printk("{%s} state was registered at:\n", usage_str[prev_bit]); | ||
2243 | print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1); | ||
2244 | |||
2245 | print_irqtrace_events(curr); | ||
2246 | printk("\nother info that might help us debug this:\n"); | ||
2247 | print_usage_bug_scenario(this); | ||
2248 | |||
2249 | lockdep_print_held_locks(curr); | ||
2250 | |||
2251 | printk("\nstack backtrace:\n"); | ||
2252 | dump_stack(); | ||
2253 | |||
2254 | return 0; | ||
2255 | } | ||
2256 | |||
2257 | /* | ||
2258 | * Print out an error if an invalid bit is set: | ||
2259 | */ | ||
2260 | static inline int | ||
2261 | valid_state(struct task_struct *curr, struct held_lock *this, | ||
2262 | enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) | ||
2263 | { | ||
2264 | if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) | ||
2265 | return print_usage_bug(curr, this, bad_bit, new_bit); | ||
2266 | return 1; | ||
2267 | } | ||
2268 | |||
2269 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2270 | enum lock_usage_bit new_bit); | ||
2271 | |||
2272 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | ||
2273 | |||
2274 | /* | ||
2275 | * print irq inversion bug: | ||
2276 | */ | ||
2277 | static int | ||
2278 | print_irq_inversion_bug(struct task_struct *curr, | ||
2279 | struct lock_list *root, struct lock_list *other, | ||
2280 | struct held_lock *this, int forwards, | ||
2281 | const char *irqclass) | ||
2282 | { | ||
2283 | struct lock_list *entry = other; | ||
2284 | struct lock_list *middle = NULL; | ||
2285 | int depth; | ||
2286 | |||
2287 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
2288 | return 0; | ||
2289 | |||
2290 | printk("\n"); | ||
2291 | printk("=========================================================\n"); | ||
2292 | printk("[ INFO: possible irq lock inversion dependency detected ]\n"); | ||
2293 | print_kernel_ident(); | ||
2294 | printk("---------------------------------------------------------\n"); | ||
2295 | printk("%s/%d just changed the state of lock:\n", | ||
2296 | curr->comm, task_pid_nr(curr)); | ||
2297 | print_lock(this); | ||
2298 | if (forwards) | ||
2299 | printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass); | ||
2300 | else | ||
2301 | printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); | ||
2302 | print_lock_name(other->class); | ||
2303 | printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); | ||
2304 | |||
2305 | printk("\nother info that might help us debug this:\n"); | ||
2306 | |||
2307 | /* Find a middle lock (if one exists) */ | ||
2308 | depth = get_lock_depth(other); | ||
2309 | do { | ||
2310 | if (depth == 0 && (entry != root)) { | ||
2311 | printk("lockdep:%s bad path found in chain graph\n", __func__); | ||
2312 | break; | ||
2313 | } | ||
2314 | middle = entry; | ||
2315 | entry = get_lock_parent(entry); | ||
2316 | depth--; | ||
2317 | } while (entry && entry != root && (depth >= 0)); | ||
2318 | if (forwards) | ||
2319 | print_irq_lock_scenario(root, other, | ||
2320 | middle ? middle->class : root->class, other->class); | ||
2321 | else | ||
2322 | print_irq_lock_scenario(other, root, | ||
2323 | middle ? middle->class : other->class, root->class); | ||
2324 | |||
2325 | lockdep_print_held_locks(curr); | ||
2326 | |||
2327 | printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); | ||
2328 | if (!save_trace(&root->trace)) | ||
2329 | return 0; | ||
2330 | print_shortest_lock_dependencies(other, root); | ||
2331 | |||
2332 | printk("\nstack backtrace:\n"); | ||
2333 | dump_stack(); | ||
2334 | |||
2335 | return 0; | ||
2336 | } | ||
2337 | |||
2338 | /* | ||
2339 | * Prove that in the forwards-direction subgraph starting at <this> | ||
2340 | * there is no lock matching <mask>: | ||
2341 | */ | ||
2342 | static int | ||
2343 | check_usage_forwards(struct task_struct *curr, struct held_lock *this, | ||
2344 | enum lock_usage_bit bit, const char *irqclass) | ||
2345 | { | ||
2346 | int ret; | ||
2347 | struct lock_list root; | ||
2348 | struct lock_list *uninitialized_var(target_entry); | ||
2349 | |||
2350 | root.parent = NULL; | ||
2351 | root.class = hlock_class(this); | ||
2352 | ret = find_usage_forwards(&root, bit, &target_entry); | ||
2353 | if (ret < 0) | ||
2354 | return print_bfs_bug(ret); | ||
2355 | if (ret == 1) | ||
2356 | return ret; | ||
2357 | |||
2358 | return print_irq_inversion_bug(curr, &root, target_entry, | ||
2359 | this, 1, irqclass); | ||
2360 | } | ||
2361 | |||
2362 | /* | ||
2363 | * Prove that in the backwards-direction subgraph starting at <this> | ||
2364 | * there is no lock matching <mask>: | ||
2365 | */ | ||
2366 | static int | ||
2367 | check_usage_backwards(struct task_struct *curr, struct held_lock *this, | ||
2368 | enum lock_usage_bit bit, const char *irqclass) | ||
2369 | { | ||
2370 | int ret; | ||
2371 | struct lock_list root; | ||
2372 | struct lock_list *uninitialized_var(target_entry); | ||
2373 | |||
2374 | root.parent = NULL; | ||
2375 | root.class = hlock_class(this); | ||
2376 | ret = find_usage_backwards(&root, bit, &target_entry); | ||
2377 | if (ret < 0) | ||
2378 | return print_bfs_bug(ret); | ||
2379 | if (ret == 1) | ||
2380 | return ret; | ||
2381 | |||
2382 | return print_irq_inversion_bug(curr, &root, target_entry, | ||
2383 | this, 0, irqclass); | ||
2384 | } | ||
2385 | |||
2386 | void print_irqtrace_events(struct task_struct *curr) | ||
2387 | { | ||
2388 | printk("irq event stamp: %u\n", curr->irq_events); | ||
2389 | printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); | ||
2390 | print_ip_sym(curr->hardirq_enable_ip); | ||
2391 | printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); | ||
2392 | print_ip_sym(curr->hardirq_disable_ip); | ||
2393 | printk("softirqs last enabled at (%u): ", curr->softirq_enable_event); | ||
2394 | print_ip_sym(curr->softirq_enable_ip); | ||
2395 | printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); | ||
2396 | print_ip_sym(curr->softirq_disable_ip); | ||
2397 | } | ||
2398 | |||
2399 | static int HARDIRQ_verbose(struct lock_class *class) | ||
2400 | { | ||
2401 | #if HARDIRQ_VERBOSE | ||
2402 | return class_filter(class); | ||
2403 | #endif | ||
2404 | return 0; | ||
2405 | } | ||
2406 | |||
2407 | static int SOFTIRQ_verbose(struct lock_class *class) | ||
2408 | { | ||
2409 | #if SOFTIRQ_VERBOSE | ||
2410 | return class_filter(class); | ||
2411 | #endif | ||
2412 | return 0; | ||
2413 | } | ||
2414 | |||
2415 | static int RECLAIM_FS_verbose(struct lock_class *class) | ||
2416 | { | ||
2417 | #if RECLAIM_VERBOSE | ||
2418 | return class_filter(class); | ||
2419 | #endif | ||
2420 | return 0; | ||
2421 | } | ||
2422 | |||
2423 | #define STRICT_READ_CHECKS 1 | ||
2424 | |||
2425 | static int (*state_verbose_f[])(struct lock_class *class) = { | ||
2426 | #define LOCKDEP_STATE(__STATE) \ | ||
2427 | __STATE##_verbose, | ||
2428 | #include "lockdep_states.h" | ||
2429 | #undef LOCKDEP_STATE | ||
2430 | }; | ||
2431 | |||
2432 | static inline int state_verbose(enum lock_usage_bit bit, | ||
2433 | struct lock_class *class) | ||
2434 | { | ||
2435 | return state_verbose_f[bit >> 2](class); | ||
2436 | } | ||
2437 | |||
2438 | typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, | ||
2439 | enum lock_usage_bit bit, const char *name); | ||
2440 | |||
2441 | static int | ||
2442 | mark_lock_irq(struct task_struct *curr, struct held_lock *this, | ||
2443 | enum lock_usage_bit new_bit) | ||
2444 | { | ||
2445 | int excl_bit = exclusive_bit(new_bit); | ||
2446 | int read = new_bit & 1; | ||
2447 | int dir = new_bit & 2; | ||
2448 | |||
2449 | /* | ||
2450 | * mark USED_IN has to look forwards -- to ensure no dependency | ||
2451 | * has ENABLED state, which would allow recursion deadlocks. | ||
2452 | * | ||
2453 | * mark ENABLED has to look backwards -- to ensure no dependee | ||
2454 | * has USED_IN state, which, again, would allow recursion deadlocks. | ||
2455 | */ | ||
2456 | check_usage_f usage = dir ? | ||
2457 | check_usage_backwards : check_usage_forwards; | ||
2458 | |||
2459 | /* | ||
2460 | * Validate that this particular lock does not have conflicting | ||
2461 | * usage states. | ||
2462 | */ | ||
2463 | if (!valid_state(curr, this, new_bit, excl_bit)) | ||
2464 | return 0; | ||
2465 | |||
2466 | /* | ||
2467 | * Validate that the lock dependencies don't have conflicting usage | ||
2468 | * states. | ||
2469 | */ | ||
2470 | if ((!read || !dir || STRICT_READ_CHECKS) && | ||
2471 | !usage(curr, this, excl_bit, state_name(new_bit & ~1))) | ||
2472 | return 0; | ||
2473 | |||
2474 | /* | ||
2475 | * Check for read in write conflicts | ||
2476 | */ | ||
2477 | if (!read) { | ||
2478 | if (!valid_state(curr, this, new_bit, excl_bit + 1)) | ||
2479 | return 0; | ||
2480 | |||
2481 | if (STRICT_READ_CHECKS && | ||
2482 | !usage(curr, this, excl_bit + 1, | ||
2483 | state_name(new_bit + 1))) | ||
2484 | return 0; | ||
2485 | } | ||
2486 | |||
2487 | if (state_verbose(new_bit, hlock_class(this))) | ||
2488 | return 2; | ||
2489 | |||
2490 | return 1; | ||
2491 | } | ||
2492 | |||
2493 | enum mark_type { | ||
2494 | #define LOCKDEP_STATE(__STATE) __STATE, | ||
2495 | #include "lockdep_states.h" | ||
2496 | #undef LOCKDEP_STATE | ||
2497 | }; | ||
2498 | |||
2499 | /* | ||
2500 | * Mark all held locks with a usage bit: | ||
2501 | */ | ||
2502 | static int | ||
2503 | mark_held_locks(struct task_struct *curr, enum mark_type mark) | ||
2504 | { | ||
2505 | enum lock_usage_bit usage_bit; | ||
2506 | struct held_lock *hlock; | ||
2507 | int i; | ||
2508 | |||
2509 | for (i = 0; i < curr->lockdep_depth; i++) { | ||
2510 | hlock = curr->held_locks + i; | ||
2511 | |||
2512 | usage_bit = 2 + (mark << 2); /* ENABLED */ | ||
2513 | if (hlock->read) | ||
2514 | usage_bit += 1; /* READ */ | ||
2515 | |||
2516 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); | ||
2517 | |||
2518 | if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) | ||
2519 | continue; | ||
2520 | |||
2521 | if (!mark_lock(curr, hlock, usage_bit)) | ||
2522 | return 0; | ||
2523 | } | ||
2524 | |||
2525 | return 1; | ||
2526 | } | ||
2527 | |||
2528 | /* | ||
2529 | * Hardirqs will be enabled: | ||
2530 | */ | ||
2531 | static void __trace_hardirqs_on_caller(unsigned long ip) | ||
2532 | { | ||
2533 | struct task_struct *curr = current; | ||
2534 | |||
2535 | /* we'll do an OFF -> ON transition: */ | ||
2536 | curr->hardirqs_enabled = 1; | ||
2537 | |||
2538 | /* | ||
2539 | * We are going to turn hardirqs on, so set the | ||
2540 | * usage bit for all held locks: | ||
2541 | */ | ||
2542 | if (!mark_held_locks(curr, HARDIRQ)) | ||
2543 | return; | ||
2544 | /* | ||
2545 | * If we have softirqs enabled, then set the usage | ||
2546 | * bit for all held locks. (disabled hardirqs prevented | ||
2547 | * this bit from being set before) | ||
2548 | */ | ||
2549 | if (curr->softirqs_enabled) | ||
2550 | if (!mark_held_locks(curr, SOFTIRQ)) | ||
2551 | return; | ||
2552 | |||
2553 | curr->hardirq_enable_ip = ip; | ||
2554 | curr->hardirq_enable_event = ++curr->irq_events; | ||
2555 | debug_atomic_inc(hardirqs_on_events); | ||
2556 | } | ||
2557 | |||
2558 | void trace_hardirqs_on_caller(unsigned long ip) | ||
2559 | { | ||
2560 | time_hardirqs_on(CALLER_ADDR0, ip); | ||
2561 | |||
2562 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
2563 | return; | ||
2564 | |||
2565 | if (unlikely(current->hardirqs_enabled)) { | ||
2566 | /* | ||
2567 | * Neither irq nor preemption are disabled here | ||
2568 | * so this is racy by nature but losing one hit | ||
2569 | * in a stat is not a big deal. | ||
2570 | */ | ||
2571 | __debug_atomic_inc(redundant_hardirqs_on); | ||
2572 | return; | ||
2573 | } | ||
2574 | |||
2575 | /* | ||
2576 | * We're enabling irqs and according to our state above irqs weren't | ||
2577 | * already enabled, yet we find the hardware thinks they are in fact | ||
2578 | * enabled.. someone messed up their IRQ state tracing. | ||
2579 | */ | ||
2580 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
2581 | return; | ||
2582 | |||
2583 | /* | ||
2584 | * See the fine text that goes along with this variable definition. | ||
2585 | */ | ||
2586 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) | ||
2587 | return; | ||
2588 | |||
2589 | /* | ||
2590 | * Can't allow enabling interrupts while in an interrupt handler, | ||
2591 | * that's general bad form and such. Recursion, limited stack etc.. | ||
2592 | */ | ||
2593 | if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) | ||
2594 | return; | ||
2595 | |||
2596 | current->lockdep_recursion = 1; | ||
2597 | __trace_hardirqs_on_caller(ip); | ||
2598 | current->lockdep_recursion = 0; | ||
2599 | } | ||
2600 | EXPORT_SYMBOL(trace_hardirqs_on_caller); | ||
2601 | |||
2602 | void trace_hardirqs_on(void) | ||
2603 | { | ||
2604 | trace_hardirqs_on_caller(CALLER_ADDR0); | ||
2605 | } | ||
2606 | EXPORT_SYMBOL(trace_hardirqs_on); | ||
2607 | |||
2608 | /* | ||
2609 | * Hardirqs were disabled: | ||
2610 | */ | ||
2611 | void trace_hardirqs_off_caller(unsigned long ip) | ||
2612 | { | ||
2613 | struct task_struct *curr = current; | ||
2614 | |||
2615 | time_hardirqs_off(CALLER_ADDR0, ip); | ||
2616 | |||
2617 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
2618 | return; | ||
2619 | |||
2620 | /* | ||
2621 | * So we're supposed to get called after you mask local IRQs, but for | ||
2622 | * some reason the hardware doesn't quite think you did a proper job. | ||
2623 | */ | ||
2624 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
2625 | return; | ||
2626 | |||
2627 | if (curr->hardirqs_enabled) { | ||
2628 | /* | ||
2629 | * We have done an ON -> OFF transition: | ||
2630 | */ | ||
2631 | curr->hardirqs_enabled = 0; | ||
2632 | curr->hardirq_disable_ip = ip; | ||
2633 | curr->hardirq_disable_event = ++curr->irq_events; | ||
2634 | debug_atomic_inc(hardirqs_off_events); | ||
2635 | } else | ||
2636 | debug_atomic_inc(redundant_hardirqs_off); | ||
2637 | } | ||
2638 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | ||
2639 | |||
2640 | void trace_hardirqs_off(void) | ||
2641 | { | ||
2642 | trace_hardirqs_off_caller(CALLER_ADDR0); | ||
2643 | } | ||
2644 | EXPORT_SYMBOL(trace_hardirqs_off); | ||
2645 | |||
2646 | /* | ||
2647 | * Softirqs will be enabled: | ||
2648 | */ | ||
2649 | void trace_softirqs_on(unsigned long ip) | ||
2650 | { | ||
2651 | struct task_struct *curr = current; | ||
2652 | |||
2653 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
2654 | return; | ||
2655 | |||
2656 | /* | ||
2657 | * We fancy IRQs being disabled here, see softirq.c, avoids | ||
2658 | * funny state and nesting things. | ||
2659 | */ | ||
2660 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
2661 | return; | ||
2662 | |||
2663 | if (curr->softirqs_enabled) { | ||
2664 | debug_atomic_inc(redundant_softirqs_on); | ||
2665 | return; | ||
2666 | } | ||
2667 | |||
2668 | current->lockdep_recursion = 1; | ||
2669 | /* | ||
2670 | * We'll do an OFF -> ON transition: | ||
2671 | */ | ||
2672 | curr->softirqs_enabled = 1; | ||
2673 | curr->softirq_enable_ip = ip; | ||
2674 | curr->softirq_enable_event = ++curr->irq_events; | ||
2675 | debug_atomic_inc(softirqs_on_events); | ||
2676 | /* | ||
2677 | * We are going to turn softirqs on, so set the | ||
2678 | * usage bit for all held locks, if hardirqs are | ||
2679 | * enabled too: | ||
2680 | */ | ||
2681 | if (curr->hardirqs_enabled) | ||
2682 | mark_held_locks(curr, SOFTIRQ); | ||
2683 | current->lockdep_recursion = 0; | ||
2684 | } | ||
2685 | |||
2686 | /* | ||
2687 | * Softirqs were disabled: | ||
2688 | */ | ||
2689 | void trace_softirqs_off(unsigned long ip) | ||
2690 | { | ||
2691 | struct task_struct *curr = current; | ||
2692 | |||
2693 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
2694 | return; | ||
2695 | |||
2696 | /* | ||
2697 | * We fancy IRQs being disabled here, see softirq.c | ||
2698 | */ | ||
2699 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
2700 | return; | ||
2701 | |||
2702 | if (curr->softirqs_enabled) { | ||
2703 | /* | ||
2704 | * We have done an ON -> OFF transition: | ||
2705 | */ | ||
2706 | curr->softirqs_enabled = 0; | ||
2707 | curr->softirq_disable_ip = ip; | ||
2708 | curr->softirq_disable_event = ++curr->irq_events; | ||
2709 | debug_atomic_inc(softirqs_off_events); | ||
2710 | /* | ||
2711 | * Whoops, we wanted softirqs off, so why aren't they? | ||
2712 | */ | ||
2713 | DEBUG_LOCKS_WARN_ON(!softirq_count()); | ||
2714 | } else | ||
2715 | debug_atomic_inc(redundant_softirqs_off); | ||
2716 | } | ||
2717 | |||
2718 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) | ||
2719 | { | ||
2720 | struct task_struct *curr = current; | ||
2721 | |||
2722 | if (unlikely(!debug_locks)) | ||
2723 | return; | ||
2724 | |||
2725 | /* no reclaim without waiting on it */ | ||
2726 | if (!(gfp_mask & __GFP_WAIT)) | ||
2727 | return; | ||
2728 | |||
2729 | /* this guy won't enter reclaim */ | ||
2730 | if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) | ||
2731 | return; | ||
2732 | |||
2733 | /* We're only interested __GFP_FS allocations for now */ | ||
2734 | if (!(gfp_mask & __GFP_FS)) | ||
2735 | return; | ||
2736 | |||
2737 | /* | ||
2738 | * Oi! Can't be having __GFP_FS allocations with IRQs disabled. | ||
2739 | */ | ||
2740 | if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) | ||
2741 | return; | ||
2742 | |||
2743 | mark_held_locks(curr, RECLAIM_FS); | ||
2744 | } | ||
2745 | |||
2746 | static void check_flags(unsigned long flags); | ||
2747 | |||
2748 | void lockdep_trace_alloc(gfp_t gfp_mask) | ||
2749 | { | ||
2750 | unsigned long flags; | ||
2751 | |||
2752 | if (unlikely(current->lockdep_recursion)) | ||
2753 | return; | ||
2754 | |||
2755 | raw_local_irq_save(flags); | ||
2756 | check_flags(flags); | ||
2757 | current->lockdep_recursion = 1; | ||
2758 | __lockdep_trace_alloc(gfp_mask, flags); | ||
2759 | current->lockdep_recursion = 0; | ||
2760 | raw_local_irq_restore(flags); | ||
2761 | } | ||
2762 | |||
2763 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | ||
2764 | { | ||
2765 | /* | ||
2766 | * If non-trylock use in a hardirq or softirq context, then | ||
2767 | * mark the lock as used in these contexts: | ||
2768 | */ | ||
2769 | if (!hlock->trylock) { | ||
2770 | if (hlock->read) { | ||
2771 | if (curr->hardirq_context) | ||
2772 | if (!mark_lock(curr, hlock, | ||
2773 | LOCK_USED_IN_HARDIRQ_READ)) | ||
2774 | return 0; | ||
2775 | if (curr->softirq_context) | ||
2776 | if (!mark_lock(curr, hlock, | ||
2777 | LOCK_USED_IN_SOFTIRQ_READ)) | ||
2778 | return 0; | ||
2779 | } else { | ||
2780 | if (curr->hardirq_context) | ||
2781 | if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) | ||
2782 | return 0; | ||
2783 | if (curr->softirq_context) | ||
2784 | if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) | ||
2785 | return 0; | ||
2786 | } | ||
2787 | } | ||
2788 | if (!hlock->hardirqs_off) { | ||
2789 | if (hlock->read) { | ||
2790 | if (!mark_lock(curr, hlock, | ||
2791 | LOCK_ENABLED_HARDIRQ_READ)) | ||
2792 | return 0; | ||
2793 | if (curr->softirqs_enabled) | ||
2794 | if (!mark_lock(curr, hlock, | ||
2795 | LOCK_ENABLED_SOFTIRQ_READ)) | ||
2796 | return 0; | ||
2797 | } else { | ||
2798 | if (!mark_lock(curr, hlock, | ||
2799 | LOCK_ENABLED_HARDIRQ)) | ||
2800 | return 0; | ||
2801 | if (curr->softirqs_enabled) | ||
2802 | if (!mark_lock(curr, hlock, | ||
2803 | LOCK_ENABLED_SOFTIRQ)) | ||
2804 | return 0; | ||
2805 | } | ||
2806 | } | ||
2807 | |||
2808 | /* | ||
2809 | * We reuse the irq context infrastructure more broadly as a general | ||
2810 | * context checking code. This tests GFP_FS recursion (a lock taken | ||
2811 | * during reclaim for a GFP_FS allocation is held over a GFP_FS | ||
2812 | * allocation). | ||
2813 | */ | ||
2814 | if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { | ||
2815 | if (hlock->read) { | ||
2816 | if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) | ||
2817 | return 0; | ||
2818 | } else { | ||
2819 | if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) | ||
2820 | return 0; | ||
2821 | } | ||
2822 | } | ||
2823 | |||
2824 | return 1; | ||
2825 | } | ||
2826 | |||
2827 | static int separate_irq_context(struct task_struct *curr, | ||
2828 | struct held_lock *hlock) | ||
2829 | { | ||
2830 | unsigned int depth = curr->lockdep_depth; | ||
2831 | |||
2832 | /* | ||
2833 | * Keep track of points where we cross into an interrupt context: | ||
2834 | */ | ||
2835 | hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + | ||
2836 | curr->softirq_context; | ||
2837 | if (depth) { | ||
2838 | struct held_lock *prev_hlock; | ||
2839 | |||
2840 | prev_hlock = curr->held_locks + depth-1; | ||
2841 | /* | ||
2842 | * If we cross into another context, reset the | ||
2843 | * hash key (this also prevents the checking and the | ||
2844 | * adding of the dependency to 'prev'): | ||
2845 | */ | ||
2846 | if (prev_hlock->irq_context != hlock->irq_context) | ||
2847 | return 1; | ||
2848 | } | ||
2849 | return 0; | ||
2850 | } | ||
2851 | |||
2852 | #else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ | ||
2853 | |||
2854 | static inline | ||
2855 | int mark_lock_irq(struct task_struct *curr, struct held_lock *this, | ||
2856 | enum lock_usage_bit new_bit) | ||
2857 | { | ||
2858 | WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */ | ||
2859 | return 1; | ||
2860 | } | ||
2861 | |||
2862 | static inline int mark_irqflags(struct task_struct *curr, | ||
2863 | struct held_lock *hlock) | ||
2864 | { | ||
2865 | return 1; | ||
2866 | } | ||
2867 | |||
2868 | static inline int separate_irq_context(struct task_struct *curr, | ||
2869 | struct held_lock *hlock) | ||
2870 | { | ||
2871 | return 0; | ||
2872 | } | ||
2873 | |||
2874 | void lockdep_trace_alloc(gfp_t gfp_mask) | ||
2875 | { | ||
2876 | } | ||
2877 | |||
2878 | #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ | ||
2879 | |||
2880 | /* | ||
2881 | * Mark a lock with a usage bit, and validate the state transition: | ||
2882 | */ | ||
2883 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2884 | enum lock_usage_bit new_bit) | ||
2885 | { | ||
2886 | unsigned int new_mask = 1 << new_bit, ret = 1; | ||
2887 | |||
2888 | /* | ||
2889 | * If already set then do not dirty the cacheline, | ||
2890 | * nor do any checks: | ||
2891 | */ | ||
2892 | if (likely(hlock_class(this)->usage_mask & new_mask)) | ||
2893 | return 1; | ||
2894 | |||
2895 | if (!graph_lock()) | ||
2896 | return 0; | ||
2897 | /* | ||
2898 | * Make sure we didn't race: | ||
2899 | */ | ||
2900 | if (unlikely(hlock_class(this)->usage_mask & new_mask)) { | ||
2901 | graph_unlock(); | ||
2902 | return 1; | ||
2903 | } | ||
2904 | |||
2905 | hlock_class(this)->usage_mask |= new_mask; | ||
2906 | |||
2907 | if (!save_trace(hlock_class(this)->usage_traces + new_bit)) | ||
2908 | return 0; | ||
2909 | |||
2910 | switch (new_bit) { | ||
2911 | #define LOCKDEP_STATE(__STATE) \ | ||
2912 | case LOCK_USED_IN_##__STATE: \ | ||
2913 | case LOCK_USED_IN_##__STATE##_READ: \ | ||
2914 | case LOCK_ENABLED_##__STATE: \ | ||
2915 | case LOCK_ENABLED_##__STATE##_READ: | ||
2916 | #include "lockdep_states.h" | ||
2917 | #undef LOCKDEP_STATE | ||
2918 | ret = mark_lock_irq(curr, this, new_bit); | ||
2919 | if (!ret) | ||
2920 | return 0; | ||
2921 | break; | ||
2922 | case LOCK_USED: | ||
2923 | debug_atomic_dec(nr_unused_locks); | ||
2924 | break; | ||
2925 | default: | ||
2926 | if (!debug_locks_off_graph_unlock()) | ||
2927 | return 0; | ||
2928 | WARN_ON(1); | ||
2929 | return 0; | ||
2930 | } | ||
2931 | |||
2932 | graph_unlock(); | ||
2933 | |||
2934 | /* | ||
2935 | * We must printk outside of the graph_lock: | ||
2936 | */ | ||
2937 | if (ret == 2) { | ||
2938 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); | ||
2939 | print_lock(this); | ||
2940 | print_irqtrace_events(curr); | ||
2941 | dump_stack(); | ||
2942 | } | ||
2943 | |||
2944 | return ret; | ||
2945 | } | ||
2946 | |||
2947 | /* | ||
2948 | * Initialize a lock instance's lock-class mapping info: | ||
2949 | */ | ||
2950 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | ||
2951 | struct lock_class_key *key, int subclass) | ||
2952 | { | ||
2953 | int i; | ||
2954 | |||
2955 | kmemcheck_mark_initialized(lock, sizeof(*lock)); | ||
2956 | |||
2957 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) | ||
2958 | lock->class_cache[i] = NULL; | ||
2959 | |||
2960 | #ifdef CONFIG_LOCK_STAT | ||
2961 | lock->cpu = raw_smp_processor_id(); | ||
2962 | #endif | ||
2963 | |||
2964 | /* | ||
2965 | * Can't be having no nameless bastards around this place! | ||
2966 | */ | ||
2967 | if (DEBUG_LOCKS_WARN_ON(!name)) { | ||
2968 | lock->name = "NULL"; | ||
2969 | return; | ||
2970 | } | ||
2971 | |||
2972 | lock->name = name; | ||
2973 | |||
2974 | /* | ||
2975 | * No key, no joy, we need to hash something. | ||
2976 | */ | ||
2977 | if (DEBUG_LOCKS_WARN_ON(!key)) | ||
2978 | return; | ||
2979 | /* | ||
2980 | * Sanity check, the lock-class key must be persistent: | ||
2981 | */ | ||
2982 | if (!static_obj(key)) { | ||
2983 | printk("BUG: key %p not in .data!\n", key); | ||
2984 | /* | ||
2985 | * What it says above ^^^^^, I suggest you read it. | ||
2986 | */ | ||
2987 | DEBUG_LOCKS_WARN_ON(1); | ||
2988 | return; | ||
2989 | } | ||
2990 | lock->key = key; | ||
2991 | |||
2992 | if (unlikely(!debug_locks)) | ||
2993 | return; | ||
2994 | |||
2995 | if (subclass) | ||
2996 | register_lock_class(lock, subclass, 1); | ||
2997 | } | ||
2998 | EXPORT_SYMBOL_GPL(lockdep_init_map); | ||
2999 | |||
3000 | struct lock_class_key __lockdep_no_validate__; | ||
3001 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); | ||
3002 | |||
3003 | static int | ||
3004 | print_lock_nested_lock_not_held(struct task_struct *curr, | ||
3005 | struct held_lock *hlock, | ||
3006 | unsigned long ip) | ||
3007 | { | ||
3008 | if (!debug_locks_off()) | ||
3009 | return 0; | ||
3010 | if (debug_locks_silent) | ||
3011 | return 0; | ||
3012 | |||
3013 | printk("\n"); | ||
3014 | printk("==================================\n"); | ||
3015 | printk("[ BUG: Nested lock was not taken ]\n"); | ||
3016 | print_kernel_ident(); | ||
3017 | printk("----------------------------------\n"); | ||
3018 | |||
3019 | printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); | ||
3020 | print_lock(hlock); | ||
3021 | |||
3022 | printk("\nbut this task is not holding:\n"); | ||
3023 | printk("%s\n", hlock->nest_lock->name); | ||
3024 | |||
3025 | printk("\nstack backtrace:\n"); | ||
3026 | dump_stack(); | ||
3027 | |||
3028 | printk("\nother info that might help us debug this:\n"); | ||
3029 | lockdep_print_held_locks(curr); | ||
3030 | |||
3031 | printk("\nstack backtrace:\n"); | ||
3032 | dump_stack(); | ||
3033 | |||
3034 | return 0; | ||
3035 | } | ||
3036 | |||
3037 | static int __lock_is_held(struct lockdep_map *lock); | ||
3038 | |||
3039 | /* | ||
3040 | * This gets called for every mutex_lock*()/spin_lock*() operation. | ||
3041 | * We maintain the dependency maps and validate the locking attempt: | ||
3042 | */ | ||
3043 | static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | ||
3044 | int trylock, int read, int check, int hardirqs_off, | ||
3045 | struct lockdep_map *nest_lock, unsigned long ip, | ||
3046 | int references) | ||
3047 | { | ||
3048 | struct task_struct *curr = current; | ||
3049 | struct lock_class *class = NULL; | ||
3050 | struct held_lock *hlock; | ||
3051 | unsigned int depth, id; | ||
3052 | int chain_head = 0; | ||
3053 | int class_idx; | ||
3054 | u64 chain_key; | ||
3055 | |||
3056 | if (!prove_locking) | ||
3057 | check = 1; | ||
3058 | |||
3059 | if (unlikely(!debug_locks)) | ||
3060 | return 0; | ||
3061 | |||
3062 | /* | ||
3063 | * Lockdep should run with IRQs disabled, otherwise we could | ||
3064 | * get an interrupt which would want to take locks, which would | ||
3065 | * end up in lockdep and have you got a head-ache already? | ||
3066 | */ | ||
3067 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
3068 | return 0; | ||
3069 | |||
3070 | if (lock->key == &__lockdep_no_validate__) | ||
3071 | check = 1; | ||
3072 | |||
3073 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) | ||
3074 | class = lock->class_cache[subclass]; | ||
3075 | /* | ||
3076 | * Not cached? | ||
3077 | */ | ||
3078 | if (unlikely(!class)) { | ||
3079 | class = register_lock_class(lock, subclass, 0); | ||
3080 | if (!class) | ||
3081 | return 0; | ||
3082 | } | ||
3083 | atomic_inc((atomic_t *)&class->ops); | ||
3084 | if (very_verbose(class)) { | ||
3085 | printk("\nacquire class [%p] %s", class->key, class->name); | ||
3086 | if (class->name_version > 1) | ||
3087 | printk("#%d", class->name_version); | ||
3088 | printk("\n"); | ||
3089 | dump_stack(); | ||
3090 | } | ||
3091 | |||
3092 | /* | ||
3093 | * Add the lock to the list of currently held locks. | ||
3094 | * (we dont increase the depth just yet, up until the | ||
3095 | * dependency checks are done) | ||
3096 | */ | ||
3097 | depth = curr->lockdep_depth; | ||
3098 | /* | ||
3099 | * Ran out of static storage for our per-task lock stack again have we? | ||
3100 | */ | ||
3101 | if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) | ||
3102 | return 0; | ||
3103 | |||
3104 | class_idx = class - lock_classes + 1; | ||
3105 | |||
3106 | if (depth) { | ||
3107 | hlock = curr->held_locks + depth - 1; | ||
3108 | if (hlock->class_idx == class_idx && nest_lock) { | ||
3109 | if (hlock->references) | ||
3110 | hlock->references++; | ||
3111 | else | ||
3112 | hlock->references = 2; | ||
3113 | |||
3114 | return 1; | ||
3115 | } | ||
3116 | } | ||
3117 | |||
3118 | hlock = curr->held_locks + depth; | ||
3119 | /* | ||
3120 | * Plain impossible, we just registered it and checked it weren't no | ||
3121 | * NULL like.. I bet this mushroom I ate was good! | ||
3122 | */ | ||
3123 | if (DEBUG_LOCKS_WARN_ON(!class)) | ||
3124 | return 0; | ||
3125 | hlock->class_idx = class_idx; | ||
3126 | hlock->acquire_ip = ip; | ||
3127 | hlock->instance = lock; | ||
3128 | hlock->nest_lock = nest_lock; | ||
3129 | hlock->trylock = trylock; | ||
3130 | hlock->read = read; | ||
3131 | hlock->check = check; | ||
3132 | hlock->hardirqs_off = !!hardirqs_off; | ||
3133 | hlock->references = references; | ||
3134 | #ifdef CONFIG_LOCK_STAT | ||
3135 | hlock->waittime_stamp = 0; | ||
3136 | hlock->holdtime_stamp = lockstat_clock(); | ||
3137 | #endif | ||
3138 | |||
3139 | if (check == 2 && !mark_irqflags(curr, hlock)) | ||
3140 | return 0; | ||
3141 | |||
3142 | /* mark it as used: */ | ||
3143 | if (!mark_lock(curr, hlock, LOCK_USED)) | ||
3144 | return 0; | ||
3145 | |||
3146 | /* | ||
3147 | * Calculate the chain hash: it's the combined hash of all the | ||
3148 | * lock keys along the dependency chain. We save the hash value | ||
3149 | * at every step so that we can get the current hash easily | ||
3150 | * after unlock. The chain hash is then used to cache dependency | ||
3151 | * results. | ||
3152 | * | ||
3153 | * The 'key ID' is what is the most compact key value to drive | ||
3154 | * the hash, not class->key. | ||
3155 | */ | ||
3156 | id = class - lock_classes; | ||
3157 | /* | ||
3158 | * Whoops, we did it again.. ran straight out of our static allocation. | ||
3159 | */ | ||
3160 | if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) | ||
3161 | return 0; | ||
3162 | |||
3163 | chain_key = curr->curr_chain_key; | ||
3164 | if (!depth) { | ||
3165 | /* | ||
3166 | * How can we have a chain hash when we ain't got no keys?! | ||
3167 | */ | ||
3168 | if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) | ||
3169 | return 0; | ||
3170 | chain_head = 1; | ||
3171 | } | ||
3172 | |||
3173 | hlock->prev_chain_key = chain_key; | ||
3174 | if (separate_irq_context(curr, hlock)) { | ||
3175 | chain_key = 0; | ||
3176 | chain_head = 1; | ||
3177 | } | ||
3178 | chain_key = iterate_chain_key(chain_key, id); | ||
3179 | |||
3180 | if (nest_lock && !__lock_is_held(nest_lock)) | ||
3181 | return print_lock_nested_lock_not_held(curr, hlock, ip); | ||
3182 | |||
3183 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) | ||
3184 | return 0; | ||
3185 | |||
3186 | curr->curr_chain_key = chain_key; | ||
3187 | curr->lockdep_depth++; | ||
3188 | check_chain_key(curr); | ||
3189 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
3190 | if (unlikely(!debug_locks)) | ||
3191 | return 0; | ||
3192 | #endif | ||
3193 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { | ||
3194 | debug_locks_off(); | ||
3195 | print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!"); | ||
3196 | printk(KERN_DEBUG "depth: %i max: %lu!\n", | ||
3197 | curr->lockdep_depth, MAX_LOCK_DEPTH); | ||
3198 | |||
3199 | lockdep_print_held_locks(current); | ||
3200 | debug_show_all_locks(); | ||
3201 | dump_stack(); | ||
3202 | |||
3203 | return 0; | ||
3204 | } | ||
3205 | |||
3206 | if (unlikely(curr->lockdep_depth > max_lockdep_depth)) | ||
3207 | max_lockdep_depth = curr->lockdep_depth; | ||
3208 | |||
3209 | return 1; | ||
3210 | } | ||
3211 | |||
3212 | static int | ||
3213 | print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | ||
3214 | unsigned long ip) | ||
3215 | { | ||
3216 | if (!debug_locks_off()) | ||
3217 | return 0; | ||
3218 | if (debug_locks_silent) | ||
3219 | return 0; | ||
3220 | |||
3221 | printk("\n"); | ||
3222 | printk("=====================================\n"); | ||
3223 | printk("[ BUG: bad unlock balance detected! ]\n"); | ||
3224 | print_kernel_ident(); | ||
3225 | printk("-------------------------------------\n"); | ||
3226 | printk("%s/%d is trying to release lock (", | ||
3227 | curr->comm, task_pid_nr(curr)); | ||
3228 | print_lockdep_cache(lock); | ||
3229 | printk(") at:\n"); | ||
3230 | print_ip_sym(ip); | ||
3231 | printk("but there are no more locks to release!\n"); | ||
3232 | printk("\nother info that might help us debug this:\n"); | ||
3233 | lockdep_print_held_locks(curr); | ||
3234 | |||
3235 | printk("\nstack backtrace:\n"); | ||
3236 | dump_stack(); | ||
3237 | |||
3238 | return 0; | ||
3239 | } | ||
3240 | |||
3241 | /* | ||
3242 | * Common debugging checks for both nested and non-nested unlock: | ||
3243 | */ | ||
3244 | static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, | ||
3245 | unsigned long ip) | ||
3246 | { | ||
3247 | if (unlikely(!debug_locks)) | ||
3248 | return 0; | ||
3249 | /* | ||
3250 | * Lockdep should run with IRQs disabled, recursion, head-ache, etc.. | ||
3251 | */ | ||
3252 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
3253 | return 0; | ||
3254 | |||
3255 | if (curr->lockdep_depth <= 0) | ||
3256 | return print_unlock_imbalance_bug(curr, lock, ip); | ||
3257 | |||
3258 | return 1; | ||
3259 | } | ||
3260 | |||
3261 | static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | ||
3262 | { | ||
3263 | if (hlock->instance == lock) | ||
3264 | return 1; | ||
3265 | |||
3266 | if (hlock->references) { | ||
3267 | struct lock_class *class = lock->class_cache[0]; | ||
3268 | |||
3269 | if (!class) | ||
3270 | class = look_up_lock_class(lock, 0); | ||
3271 | |||
3272 | /* | ||
3273 | * If look_up_lock_class() failed to find a class, we're trying | ||
3274 | * to test if we hold a lock that has never yet been acquired. | ||
3275 | * Clearly if the lock hasn't been acquired _ever_, we're not | ||
3276 | * holding it either, so report failure. | ||
3277 | */ | ||
3278 | if (!class) | ||
3279 | return 0; | ||
3280 | |||
3281 | /* | ||
3282 | * References, but not a lock we're actually ref-counting? | ||
3283 | * State got messed up, follow the sites that change ->references | ||
3284 | * and try to make sense of it. | ||
3285 | */ | ||
3286 | if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) | ||
3287 | return 0; | ||
3288 | |||
3289 | if (hlock->class_idx == class - lock_classes + 1) | ||
3290 | return 1; | ||
3291 | } | ||
3292 | |||
3293 | return 0; | ||
3294 | } | ||
3295 | |||
3296 | static int | ||
3297 | __lock_set_class(struct lockdep_map *lock, const char *name, | ||
3298 | struct lock_class_key *key, unsigned int subclass, | ||
3299 | unsigned long ip) | ||
3300 | { | ||
3301 | struct task_struct *curr = current; | ||
3302 | struct held_lock *hlock, *prev_hlock; | ||
3303 | struct lock_class *class; | ||
3304 | unsigned int depth; | ||
3305 | int i; | ||
3306 | |||
3307 | depth = curr->lockdep_depth; | ||
3308 | /* | ||
3309 | * This function is about (re)setting the class of a held lock, | ||
3310 | * yet we're not actually holding any locks. Naughty user! | ||
3311 | */ | ||
3312 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
3313 | return 0; | ||
3314 | |||
3315 | prev_hlock = NULL; | ||
3316 | for (i = depth-1; i >= 0; i--) { | ||
3317 | hlock = curr->held_locks + i; | ||
3318 | /* | ||
3319 | * We must not cross into another context: | ||
3320 | */ | ||
3321 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
3322 | break; | ||
3323 | if (match_held_lock(hlock, lock)) | ||
3324 | goto found_it; | ||
3325 | prev_hlock = hlock; | ||
3326 | } | ||
3327 | return print_unlock_imbalance_bug(curr, lock, ip); | ||
3328 | |||
3329 | found_it: | ||
3330 | lockdep_init_map(lock, name, key, 0); | ||
3331 | class = register_lock_class(lock, subclass, 0); | ||
3332 | hlock->class_idx = class - lock_classes + 1; | ||
3333 | |||
3334 | curr->lockdep_depth = i; | ||
3335 | curr->curr_chain_key = hlock->prev_chain_key; | ||
3336 | |||
3337 | for (; i < depth; i++) { | ||
3338 | hlock = curr->held_locks + i; | ||
3339 | if (!__lock_acquire(hlock->instance, | ||
3340 | hlock_class(hlock)->subclass, hlock->trylock, | ||
3341 | hlock->read, hlock->check, hlock->hardirqs_off, | ||
3342 | hlock->nest_lock, hlock->acquire_ip, | ||
3343 | hlock->references)) | ||
3344 | return 0; | ||
3345 | } | ||
3346 | |||
3347 | /* | ||
3348 | * I took it apart and put it back together again, except now I have | ||
3349 | * these 'spare' parts.. where shall I put them. | ||
3350 | */ | ||
3351 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) | ||
3352 | return 0; | ||
3353 | return 1; | ||
3354 | } | ||
3355 | |||
3356 | /* | ||
3357 | * Remove the lock to the list of currently held locks in a | ||
3358 | * potentially non-nested (out of order) manner. This is a | ||
3359 | * relatively rare operation, as all the unlock APIs default | ||
3360 | * to nested mode (which uses lock_release()): | ||
3361 | */ | ||
3362 | static int | ||
3363 | lock_release_non_nested(struct task_struct *curr, | ||
3364 | struct lockdep_map *lock, unsigned long ip) | ||
3365 | { | ||
3366 | struct held_lock *hlock, *prev_hlock; | ||
3367 | unsigned int depth; | ||
3368 | int i; | ||
3369 | |||
3370 | /* | ||
3371 | * Check whether the lock exists in the current stack | ||
3372 | * of held locks: | ||
3373 | */ | ||
3374 | depth = curr->lockdep_depth; | ||
3375 | /* | ||
3376 | * So we're all set to release this lock.. wait what lock? We don't | ||
3377 | * own any locks, you've been drinking again? | ||
3378 | */ | ||
3379 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
3380 | return 0; | ||
3381 | |||
3382 | prev_hlock = NULL; | ||
3383 | for (i = depth-1; i >= 0; i--) { | ||
3384 | hlock = curr->held_locks + i; | ||
3385 | /* | ||
3386 | * We must not cross into another context: | ||
3387 | */ | ||
3388 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
3389 | break; | ||
3390 | if (match_held_lock(hlock, lock)) | ||
3391 | goto found_it; | ||
3392 | prev_hlock = hlock; | ||
3393 | } | ||
3394 | return print_unlock_imbalance_bug(curr, lock, ip); | ||
3395 | |||
3396 | found_it: | ||
3397 | if (hlock->instance == lock) | ||
3398 | lock_release_holdtime(hlock); | ||
3399 | |||
3400 | if (hlock->references) { | ||
3401 | hlock->references--; | ||
3402 | if (hlock->references) { | ||
3403 | /* | ||
3404 | * We had, and after removing one, still have | ||
3405 | * references, the current lock stack is still | ||
3406 | * valid. We're done! | ||
3407 | */ | ||
3408 | return 1; | ||
3409 | } | ||
3410 | } | ||
3411 | |||
3412 | /* | ||
3413 | * We have the right lock to unlock, 'hlock' points to it. | ||
3414 | * Now we remove it from the stack, and add back the other | ||
3415 | * entries (if any), recalculating the hash along the way: | ||
3416 | */ | ||
3417 | |||
3418 | curr->lockdep_depth = i; | ||
3419 | curr->curr_chain_key = hlock->prev_chain_key; | ||
3420 | |||
3421 | for (i++; i < depth; i++) { | ||
3422 | hlock = curr->held_locks + i; | ||
3423 | if (!__lock_acquire(hlock->instance, | ||
3424 | hlock_class(hlock)->subclass, hlock->trylock, | ||
3425 | hlock->read, hlock->check, hlock->hardirqs_off, | ||
3426 | hlock->nest_lock, hlock->acquire_ip, | ||
3427 | hlock->references)) | ||
3428 | return 0; | ||
3429 | } | ||
3430 | |||
3431 | /* | ||
3432 | * We had N bottles of beer on the wall, we drank one, but now | ||
3433 | * there's not N-1 bottles of beer left on the wall... | ||
3434 | */ | ||
3435 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) | ||
3436 | return 0; | ||
3437 | return 1; | ||
3438 | } | ||
3439 | |||
3440 | /* | ||
3441 | * Remove the lock to the list of currently held locks - this gets | ||
3442 | * called on mutex_unlock()/spin_unlock*() (or on a failed | ||
3443 | * mutex_lock_interruptible()). This is done for unlocks that nest | ||
3444 | * perfectly. (i.e. the current top of the lock-stack is unlocked) | ||
3445 | */ | ||
3446 | static int lock_release_nested(struct task_struct *curr, | ||
3447 | struct lockdep_map *lock, unsigned long ip) | ||
3448 | { | ||
3449 | struct held_lock *hlock; | ||
3450 | unsigned int depth; | ||
3451 | |||
3452 | /* | ||
3453 | * Pop off the top of the lock stack: | ||
3454 | */ | ||
3455 | depth = curr->lockdep_depth - 1; | ||
3456 | hlock = curr->held_locks + depth; | ||
3457 | |||
3458 | /* | ||
3459 | * Is the unlock non-nested: | ||
3460 | */ | ||
3461 | if (hlock->instance != lock || hlock->references) | ||
3462 | return lock_release_non_nested(curr, lock, ip); | ||
3463 | curr->lockdep_depth--; | ||
3464 | |||
3465 | /* | ||
3466 | * No more locks, but somehow we've got hash left over, who left it? | ||
3467 | */ | ||
3468 | if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) | ||
3469 | return 0; | ||
3470 | |||
3471 | curr->curr_chain_key = hlock->prev_chain_key; | ||
3472 | |||
3473 | lock_release_holdtime(hlock); | ||
3474 | |||
3475 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
3476 | hlock->prev_chain_key = 0; | ||
3477 | hlock->class_idx = 0; | ||
3478 | hlock->acquire_ip = 0; | ||
3479 | hlock->irq_context = 0; | ||
3480 | #endif | ||
3481 | return 1; | ||
3482 | } | ||
3483 | |||
3484 | /* | ||
3485 | * Remove the lock to the list of currently held locks - this gets | ||
3486 | * called on mutex_unlock()/spin_unlock*() (or on a failed | ||
3487 | * mutex_lock_interruptible()). This is done for unlocks that nest | ||
3488 | * perfectly. (i.e. the current top of the lock-stack is unlocked) | ||
3489 | */ | ||
3490 | static void | ||
3491 | __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | ||
3492 | { | ||
3493 | struct task_struct *curr = current; | ||
3494 | |||
3495 | if (!check_unlock(curr, lock, ip)) | ||
3496 | return; | ||
3497 | |||
3498 | if (nested) { | ||
3499 | if (!lock_release_nested(curr, lock, ip)) | ||
3500 | return; | ||
3501 | } else { | ||
3502 | if (!lock_release_non_nested(curr, lock, ip)) | ||
3503 | return; | ||
3504 | } | ||
3505 | |||
3506 | check_chain_key(curr); | ||
3507 | } | ||
3508 | |||
3509 | static int __lock_is_held(struct lockdep_map *lock) | ||
3510 | { | ||
3511 | struct task_struct *curr = current; | ||
3512 | int i; | ||
3513 | |||
3514 | for (i = 0; i < curr->lockdep_depth; i++) { | ||
3515 | struct held_lock *hlock = curr->held_locks + i; | ||
3516 | |||
3517 | if (match_held_lock(hlock, lock)) | ||
3518 | return 1; | ||
3519 | } | ||
3520 | |||
3521 | return 0; | ||
3522 | } | ||
3523 | |||
3524 | /* | ||
3525 | * Check whether we follow the irq-flags state precisely: | ||
3526 | */ | ||
3527 | static void check_flags(unsigned long flags) | ||
3528 | { | ||
3529 | #if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) && \ | ||
3530 | defined(CONFIG_TRACE_IRQFLAGS) | ||
3531 | if (!debug_locks) | ||
3532 | return; | ||
3533 | |||
3534 | if (irqs_disabled_flags(flags)) { | ||
3535 | if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) { | ||
3536 | printk("possible reason: unannotated irqs-off.\n"); | ||
3537 | } | ||
3538 | } else { | ||
3539 | if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) { | ||
3540 | printk("possible reason: unannotated irqs-on.\n"); | ||
3541 | } | ||
3542 | } | ||
3543 | |||
3544 | /* | ||
3545 | * We dont accurately track softirq state in e.g. | ||
3546 | * hardirq contexts (such as on 4KSTACKS), so only | ||
3547 | * check if not in hardirq contexts: | ||
3548 | */ | ||
3549 | if (!hardirq_count()) { | ||
3550 | if (softirq_count()) { | ||
3551 | /* like the above, but with softirqs */ | ||
3552 | DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); | ||
3553 | } else { | ||
3554 | /* lick the above, does it taste good? */ | ||
3555 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); | ||
3556 | } | ||
3557 | } | ||
3558 | |||
3559 | if (!debug_locks) | ||
3560 | print_irqtrace_events(current); | ||
3561 | #endif | ||
3562 | } | ||
3563 | |||
3564 | void lock_set_class(struct lockdep_map *lock, const char *name, | ||
3565 | struct lock_class_key *key, unsigned int subclass, | ||
3566 | unsigned long ip) | ||
3567 | { | ||
3568 | unsigned long flags; | ||
3569 | |||
3570 | if (unlikely(current->lockdep_recursion)) | ||
3571 | return; | ||
3572 | |||
3573 | raw_local_irq_save(flags); | ||
3574 | current->lockdep_recursion = 1; | ||
3575 | check_flags(flags); | ||
3576 | if (__lock_set_class(lock, name, key, subclass, ip)) | ||
3577 | check_chain_key(current); | ||
3578 | current->lockdep_recursion = 0; | ||
3579 | raw_local_irq_restore(flags); | ||
3580 | } | ||
3581 | EXPORT_SYMBOL_GPL(lock_set_class); | ||
3582 | |||
3583 | /* | ||
3584 | * We are not always called with irqs disabled - do that here, | ||
3585 | * and also avoid lockdep recursion: | ||
3586 | */ | ||
3587 | void lock_acquire(struct lockdep_map *lock, unsigned int subclass, | ||
3588 | int trylock, int read, int check, | ||
3589 | struct lockdep_map *nest_lock, unsigned long ip) | ||
3590 | { | ||
3591 | unsigned long flags; | ||
3592 | |||
3593 | if (unlikely(current->lockdep_recursion)) | ||
3594 | return; | ||
3595 | |||
3596 | raw_local_irq_save(flags); | ||
3597 | check_flags(flags); | ||
3598 | |||
3599 | current->lockdep_recursion = 1; | ||
3600 | trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); | ||
3601 | __lock_acquire(lock, subclass, trylock, read, check, | ||
3602 | irqs_disabled_flags(flags), nest_lock, ip, 0); | ||
3603 | current->lockdep_recursion = 0; | ||
3604 | raw_local_irq_restore(flags); | ||
3605 | } | ||
3606 | EXPORT_SYMBOL_GPL(lock_acquire); | ||
3607 | |||
3608 | void lock_release(struct lockdep_map *lock, int nested, | ||
3609 | unsigned long ip) | ||
3610 | { | ||
3611 | unsigned long flags; | ||
3612 | |||
3613 | if (unlikely(current->lockdep_recursion)) | ||
3614 | return; | ||
3615 | |||
3616 | raw_local_irq_save(flags); | ||
3617 | check_flags(flags); | ||
3618 | current->lockdep_recursion = 1; | ||
3619 | trace_lock_release(lock, ip); | ||
3620 | __lock_release(lock, nested, ip); | ||
3621 | current->lockdep_recursion = 0; | ||
3622 | raw_local_irq_restore(flags); | ||
3623 | } | ||
3624 | EXPORT_SYMBOL_GPL(lock_release); | ||
3625 | |||
3626 | int lock_is_held(struct lockdep_map *lock) | ||
3627 | { | ||
3628 | unsigned long flags; | ||
3629 | int ret = 0; | ||
3630 | |||
3631 | if (unlikely(current->lockdep_recursion)) | ||
3632 | return 1; /* avoid false negative lockdep_assert_held() */ | ||
3633 | |||
3634 | raw_local_irq_save(flags); | ||
3635 | check_flags(flags); | ||
3636 | |||
3637 | current->lockdep_recursion = 1; | ||
3638 | ret = __lock_is_held(lock); | ||
3639 | current->lockdep_recursion = 0; | ||
3640 | raw_local_irq_restore(flags); | ||
3641 | |||
3642 | return ret; | ||
3643 | } | ||
3644 | EXPORT_SYMBOL_GPL(lock_is_held); | ||
3645 | |||
3646 | void lockdep_set_current_reclaim_state(gfp_t gfp_mask) | ||
3647 | { | ||
3648 | current->lockdep_reclaim_gfp = gfp_mask; | ||
3649 | } | ||
3650 | |||
3651 | void lockdep_clear_current_reclaim_state(void) | ||
3652 | { | ||
3653 | current->lockdep_reclaim_gfp = 0; | ||
3654 | } | ||
3655 | |||
3656 | #ifdef CONFIG_LOCK_STAT | ||
3657 | static int | ||
3658 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | ||
3659 | unsigned long ip) | ||
3660 | { | ||
3661 | if (!debug_locks_off()) | ||
3662 | return 0; | ||
3663 | if (debug_locks_silent) | ||
3664 | return 0; | ||
3665 | |||
3666 | printk("\n"); | ||
3667 | printk("=================================\n"); | ||
3668 | printk("[ BUG: bad contention detected! ]\n"); | ||
3669 | print_kernel_ident(); | ||
3670 | printk("---------------------------------\n"); | ||
3671 | printk("%s/%d is trying to contend lock (", | ||
3672 | curr->comm, task_pid_nr(curr)); | ||
3673 | print_lockdep_cache(lock); | ||
3674 | printk(") at:\n"); | ||
3675 | print_ip_sym(ip); | ||
3676 | printk("but there are no locks held!\n"); | ||
3677 | printk("\nother info that might help us debug this:\n"); | ||
3678 | lockdep_print_held_locks(curr); | ||
3679 | |||
3680 | printk("\nstack backtrace:\n"); | ||
3681 | dump_stack(); | ||
3682 | |||
3683 | return 0; | ||
3684 | } | ||
3685 | |||
3686 | static void | ||
3687 | __lock_contended(struct lockdep_map *lock, unsigned long ip) | ||
3688 | { | ||
3689 | struct task_struct *curr = current; | ||
3690 | struct held_lock *hlock, *prev_hlock; | ||
3691 | struct lock_class_stats *stats; | ||
3692 | unsigned int depth; | ||
3693 | int i, contention_point, contending_point; | ||
3694 | |||
3695 | depth = curr->lockdep_depth; | ||
3696 | /* | ||
3697 | * Whee, we contended on this lock, except it seems we're not | ||
3698 | * actually trying to acquire anything much at all.. | ||
3699 | */ | ||
3700 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
3701 | return; | ||
3702 | |||
3703 | prev_hlock = NULL; | ||
3704 | for (i = depth-1; i >= 0; i--) { | ||
3705 | hlock = curr->held_locks + i; | ||
3706 | /* | ||
3707 | * We must not cross into another context: | ||
3708 | */ | ||
3709 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
3710 | break; | ||
3711 | if (match_held_lock(hlock, lock)) | ||
3712 | goto found_it; | ||
3713 | prev_hlock = hlock; | ||
3714 | } | ||
3715 | print_lock_contention_bug(curr, lock, ip); | ||
3716 | return; | ||
3717 | |||
3718 | found_it: | ||
3719 | if (hlock->instance != lock) | ||
3720 | return; | ||
3721 | |||
3722 | hlock->waittime_stamp = lockstat_clock(); | ||
3723 | |||
3724 | contention_point = lock_point(hlock_class(hlock)->contention_point, ip); | ||
3725 | contending_point = lock_point(hlock_class(hlock)->contending_point, | ||
3726 | lock->ip); | ||
3727 | |||
3728 | stats = get_lock_stats(hlock_class(hlock)); | ||
3729 | if (contention_point < LOCKSTAT_POINTS) | ||
3730 | stats->contention_point[contention_point]++; | ||
3731 | if (contending_point < LOCKSTAT_POINTS) | ||
3732 | stats->contending_point[contending_point]++; | ||
3733 | if (lock->cpu != smp_processor_id()) | ||
3734 | stats->bounces[bounce_contended + !!hlock->read]++; | ||
3735 | put_lock_stats(stats); | ||
3736 | } | ||
3737 | |||
3738 | static void | ||
3739 | __lock_acquired(struct lockdep_map *lock, unsigned long ip) | ||
3740 | { | ||
3741 | struct task_struct *curr = current; | ||
3742 | struct held_lock *hlock, *prev_hlock; | ||
3743 | struct lock_class_stats *stats; | ||
3744 | unsigned int depth; | ||
3745 | u64 now, waittime = 0; | ||
3746 | int i, cpu; | ||
3747 | |||
3748 | depth = curr->lockdep_depth; | ||
3749 | /* | ||
3750 | * Yay, we acquired ownership of this lock we didn't try to | ||
3751 | * acquire, how the heck did that happen? | ||
3752 | */ | ||
3753 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
3754 | return; | ||
3755 | |||
3756 | prev_hlock = NULL; | ||
3757 | for (i = depth-1; i >= 0; i--) { | ||
3758 | hlock = curr->held_locks + i; | ||
3759 | /* | ||
3760 | * We must not cross into another context: | ||
3761 | */ | ||
3762 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
3763 | break; | ||
3764 | if (match_held_lock(hlock, lock)) | ||
3765 | goto found_it; | ||
3766 | prev_hlock = hlock; | ||
3767 | } | ||
3768 | print_lock_contention_bug(curr, lock, _RET_IP_); | ||
3769 | return; | ||
3770 | |||
3771 | found_it: | ||
3772 | if (hlock->instance != lock) | ||
3773 | return; | ||
3774 | |||
3775 | cpu = smp_processor_id(); | ||
3776 | if (hlock->waittime_stamp) { | ||
3777 | now = lockstat_clock(); | ||
3778 | waittime = now - hlock->waittime_stamp; | ||
3779 | hlock->holdtime_stamp = now; | ||
3780 | } | ||
3781 | |||
3782 | trace_lock_acquired(lock, ip); | ||
3783 | |||
3784 | stats = get_lock_stats(hlock_class(hlock)); | ||
3785 | if (waittime) { | ||
3786 | if (hlock->read) | ||
3787 | lock_time_inc(&stats->read_waittime, waittime); | ||
3788 | else | ||
3789 | lock_time_inc(&stats->write_waittime, waittime); | ||
3790 | } | ||
3791 | if (lock->cpu != cpu) | ||
3792 | stats->bounces[bounce_acquired + !!hlock->read]++; | ||
3793 | put_lock_stats(stats); | ||
3794 | |||
3795 | lock->cpu = cpu; | ||
3796 | lock->ip = ip; | ||
3797 | } | ||
3798 | |||
3799 | void lock_contended(struct lockdep_map *lock, unsigned long ip) | ||
3800 | { | ||
3801 | unsigned long flags; | ||
3802 | |||
3803 | if (unlikely(!lock_stat)) | ||
3804 | return; | ||
3805 | |||
3806 | if (unlikely(current->lockdep_recursion)) | ||
3807 | return; | ||
3808 | |||
3809 | raw_local_irq_save(flags); | ||
3810 | check_flags(flags); | ||
3811 | current->lockdep_recursion = 1; | ||
3812 | trace_lock_contended(lock, ip); | ||
3813 | __lock_contended(lock, ip); | ||
3814 | current->lockdep_recursion = 0; | ||
3815 | raw_local_irq_restore(flags); | ||
3816 | } | ||
3817 | EXPORT_SYMBOL_GPL(lock_contended); | ||
3818 | |||
3819 | void lock_acquired(struct lockdep_map *lock, unsigned long ip) | ||
3820 | { | ||
3821 | unsigned long flags; | ||
3822 | |||
3823 | if (unlikely(!lock_stat)) | ||
3824 | return; | ||
3825 | |||
3826 | if (unlikely(current->lockdep_recursion)) | ||
3827 | return; | ||
3828 | |||
3829 | raw_local_irq_save(flags); | ||
3830 | check_flags(flags); | ||
3831 | current->lockdep_recursion = 1; | ||
3832 | __lock_acquired(lock, ip); | ||
3833 | current->lockdep_recursion = 0; | ||
3834 | raw_local_irq_restore(flags); | ||
3835 | } | ||
3836 | EXPORT_SYMBOL_GPL(lock_acquired); | ||
3837 | #endif | ||
3838 | |||
3839 | /* | ||
3840 | * Used by the testsuite, sanitize the validator state | ||
3841 | * after a simulated failure: | ||
3842 | */ | ||
3843 | |||
3844 | void lockdep_reset(void) | ||
3845 | { | ||
3846 | unsigned long flags; | ||
3847 | int i; | ||
3848 | |||
3849 | raw_local_irq_save(flags); | ||
3850 | current->curr_chain_key = 0; | ||
3851 | current->lockdep_depth = 0; | ||
3852 | current->lockdep_recursion = 0; | ||
3853 | memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); | ||
3854 | nr_hardirq_chains = 0; | ||
3855 | nr_softirq_chains = 0; | ||
3856 | nr_process_chains = 0; | ||
3857 | debug_locks = 1; | ||
3858 | for (i = 0; i < CHAINHASH_SIZE; i++) | ||
3859 | INIT_LIST_HEAD(chainhash_table + i); | ||
3860 | raw_local_irq_restore(flags); | ||
3861 | } | ||
3862 | |||
3863 | static void zap_class(struct lock_class *class) | ||
3864 | { | ||
3865 | int i; | ||
3866 | |||
3867 | /* | ||
3868 | * Remove all dependencies this lock is | ||
3869 | * involved in: | ||
3870 | */ | ||
3871 | for (i = 0; i < nr_list_entries; i++) { | ||
3872 | if (list_entries[i].class == class) | ||
3873 | list_del_rcu(&list_entries[i].entry); | ||
3874 | } | ||
3875 | /* | ||
3876 | * Unhash the class and remove it from the all_lock_classes list: | ||
3877 | */ | ||
3878 | list_del_rcu(&class->hash_entry); | ||
3879 | list_del_rcu(&class->lock_entry); | ||
3880 | |||
3881 | class->key = NULL; | ||
3882 | } | ||
3883 | |||
3884 | static inline int within(const void *addr, void *start, unsigned long size) | ||
3885 | { | ||
3886 | return addr >= start && addr < start + size; | ||
3887 | } | ||
3888 | |||
3889 | void lockdep_free_key_range(void *start, unsigned long size) | ||
3890 | { | ||
3891 | struct lock_class *class, *next; | ||
3892 | struct list_head *head; | ||
3893 | unsigned long flags; | ||
3894 | int i; | ||
3895 | int locked; | ||
3896 | |||
3897 | raw_local_irq_save(flags); | ||
3898 | locked = graph_lock(); | ||
3899 | |||
3900 | /* | ||
3901 | * Unhash all classes that were created by this module: | ||
3902 | */ | ||
3903 | for (i = 0; i < CLASSHASH_SIZE; i++) { | ||
3904 | head = classhash_table + i; | ||
3905 | if (list_empty(head)) | ||
3906 | continue; | ||
3907 | list_for_each_entry_safe(class, next, head, hash_entry) { | ||
3908 | if (within(class->key, start, size)) | ||
3909 | zap_class(class); | ||
3910 | else if (within(class->name, start, size)) | ||
3911 | zap_class(class); | ||
3912 | } | ||
3913 | } | ||
3914 | |||
3915 | if (locked) | ||
3916 | graph_unlock(); | ||
3917 | raw_local_irq_restore(flags); | ||
3918 | } | ||
3919 | |||
3920 | void lockdep_reset_lock(struct lockdep_map *lock) | ||
3921 | { | ||
3922 | struct lock_class *class, *next; | ||
3923 | struct list_head *head; | ||
3924 | unsigned long flags; | ||
3925 | int i, j; | ||
3926 | int locked; | ||
3927 | |||
3928 | raw_local_irq_save(flags); | ||
3929 | |||
3930 | /* | ||
3931 | * Remove all classes this lock might have: | ||
3932 | */ | ||
3933 | for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { | ||
3934 | /* | ||
3935 | * If the class exists we look it up and zap it: | ||
3936 | */ | ||
3937 | class = look_up_lock_class(lock, j); | ||
3938 | if (class) | ||
3939 | zap_class(class); | ||
3940 | } | ||
3941 | /* | ||
3942 | * Debug check: in the end all mapped classes should | ||
3943 | * be gone. | ||
3944 | */ | ||
3945 | locked = graph_lock(); | ||
3946 | for (i = 0; i < CLASSHASH_SIZE; i++) { | ||
3947 | head = classhash_table + i; | ||
3948 | if (list_empty(head)) | ||
3949 | continue; | ||
3950 | list_for_each_entry_safe(class, next, head, hash_entry) { | ||
3951 | int match = 0; | ||
3952 | |||
3953 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | ||
3954 | match |= class == lock->class_cache[j]; | ||
3955 | |||
3956 | if (unlikely(match)) { | ||
3957 | if (debug_locks_off_graph_unlock()) { | ||
3958 | /* | ||
3959 | * We all just reset everything, how did it match? | ||
3960 | */ | ||
3961 | WARN_ON(1); | ||
3962 | } | ||
3963 | goto out_restore; | ||
3964 | } | ||
3965 | } | ||
3966 | } | ||
3967 | if (locked) | ||
3968 | graph_unlock(); | ||
3969 | |||
3970 | out_restore: | ||
3971 | raw_local_irq_restore(flags); | ||
3972 | } | ||
3973 | |||
3974 | void lockdep_init(void) | ||
3975 | { | ||
3976 | int i; | ||
3977 | |||
3978 | /* | ||
3979 | * Some architectures have their own start_kernel() | ||
3980 | * code which calls lockdep_init(), while we also | ||
3981 | * call lockdep_init() from the start_kernel() itself, | ||
3982 | * and we want to initialize the hashes only once: | ||
3983 | */ | ||
3984 | if (lockdep_initialized) | ||
3985 | return; | ||
3986 | |||
3987 | for (i = 0; i < CLASSHASH_SIZE; i++) | ||
3988 | INIT_LIST_HEAD(classhash_table + i); | ||
3989 | |||
3990 | for (i = 0; i < CHAINHASH_SIZE; i++) | ||
3991 | INIT_LIST_HEAD(chainhash_table + i); | ||
3992 | |||
3993 | lockdep_initialized = 1; | ||
3994 | } | ||
3995 | |||
3996 | void __init lockdep_info(void) | ||
3997 | { | ||
3998 | printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); | ||
3999 | |||
4000 | printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); | ||
4001 | printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); | ||
4002 | printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); | ||
4003 | printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); | ||
4004 | printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); | ||
4005 | printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); | ||
4006 | printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); | ||
4007 | |||
4008 | printk(" memory used by lock dependency info: %lu kB\n", | ||
4009 | (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS + | ||
4010 | sizeof(struct list_head) * CLASSHASH_SIZE + | ||
4011 | sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES + | ||
4012 | sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + | ||
4013 | sizeof(struct list_head) * CHAINHASH_SIZE | ||
4014 | #ifdef CONFIG_PROVE_LOCKING | ||
4015 | + sizeof(struct circular_queue) | ||
4016 | #endif | ||
4017 | ) / 1024 | ||
4018 | ); | ||
4019 | |||
4020 | printk(" per task-struct memory footprint: %lu bytes\n", | ||
4021 | sizeof(struct held_lock) * MAX_LOCK_DEPTH); | ||
4022 | |||
4023 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
4024 | if (lockdep_init_error) { | ||
4025 | printk("WARNING: lockdep init error! lock-%s was acquired" | ||
4026 | "before lockdep_init\n", lock_init_error); | ||
4027 | printk("Call stack leading to lockdep invocation was:\n"); | ||
4028 | print_stack_trace(&lockdep_init_trace, 0); | ||
4029 | } | ||
4030 | #endif | ||
4031 | } | ||
4032 | |||
4033 | static void | ||
4034 | print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | ||
4035 | const void *mem_to, struct held_lock *hlock) | ||
4036 | { | ||
4037 | if (!debug_locks_off()) | ||
4038 | return; | ||
4039 | if (debug_locks_silent) | ||
4040 | return; | ||
4041 | |||
4042 | printk("\n"); | ||
4043 | printk("=========================\n"); | ||
4044 | printk("[ BUG: held lock freed! ]\n"); | ||
4045 | print_kernel_ident(); | ||
4046 | printk("-------------------------\n"); | ||
4047 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | ||
4048 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); | ||
4049 | print_lock(hlock); | ||
4050 | lockdep_print_held_locks(curr); | ||
4051 | |||
4052 | printk("\nstack backtrace:\n"); | ||
4053 | dump_stack(); | ||
4054 | } | ||
4055 | |||
4056 | static inline int not_in_range(const void* mem_from, unsigned long mem_len, | ||
4057 | const void* lock_from, unsigned long lock_len) | ||
4058 | { | ||
4059 | return lock_from + lock_len <= mem_from || | ||
4060 | mem_from + mem_len <= lock_from; | ||
4061 | } | ||
4062 | |||
4063 | /* | ||
4064 | * Called when kernel memory is freed (or unmapped), or if a lock | ||
4065 | * is destroyed or reinitialized - this code checks whether there is | ||
4066 | * any held lock in the memory range of <from> to <to>: | ||
4067 | */ | ||
4068 | void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) | ||
4069 | { | ||
4070 | struct task_struct *curr = current; | ||
4071 | struct held_lock *hlock; | ||
4072 | unsigned long flags; | ||
4073 | int i; | ||
4074 | |||
4075 | if (unlikely(!debug_locks)) | ||
4076 | return; | ||
4077 | |||
4078 | local_irq_save(flags); | ||
4079 | for (i = 0; i < curr->lockdep_depth; i++) { | ||
4080 | hlock = curr->held_locks + i; | ||
4081 | |||
4082 | if (not_in_range(mem_from, mem_len, hlock->instance, | ||
4083 | sizeof(*hlock->instance))) | ||
4084 | continue; | ||
4085 | |||
4086 | print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); | ||
4087 | break; | ||
4088 | } | ||
4089 | local_irq_restore(flags); | ||
4090 | } | ||
4091 | EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); | ||
4092 | |||
4093 | static void print_held_locks_bug(void) | ||
4094 | { | ||
4095 | if (!debug_locks_off()) | ||
4096 | return; | ||
4097 | if (debug_locks_silent) | ||
4098 | return; | ||
4099 | |||
4100 | printk("\n"); | ||
4101 | printk("=====================================\n"); | ||
4102 | printk("[ BUG: %s/%d still has locks held! ]\n", | ||
4103 | current->comm, task_pid_nr(current)); | ||
4104 | print_kernel_ident(); | ||
4105 | printk("-------------------------------------\n"); | ||
4106 | lockdep_print_held_locks(current); | ||
4107 | printk("\nstack backtrace:\n"); | ||
4108 | dump_stack(); | ||
4109 | } | ||
4110 | |||
4111 | void debug_check_no_locks_held(void) | ||
4112 | { | ||
4113 | if (unlikely(current->lockdep_depth > 0)) | ||
4114 | print_held_locks_bug(); | ||
4115 | } | ||
4116 | EXPORT_SYMBOL_GPL(debug_check_no_locks_held); | ||
4117 | |||
4118 | void debug_show_all_locks(void) | ||
4119 | { | ||
4120 | struct task_struct *g, *p; | ||
4121 | int count = 10; | ||
4122 | int unlock = 1; | ||
4123 | |||
4124 | if (unlikely(!debug_locks)) { | ||
4125 | printk("INFO: lockdep is turned off.\n"); | ||
4126 | return; | ||
4127 | } | ||
4128 | printk("\nShowing all locks held in the system:\n"); | ||
4129 | |||
4130 | /* | ||
4131 | * Here we try to get the tasklist_lock as hard as possible, | ||
4132 | * if not successful after 2 seconds we ignore it (but keep | ||
4133 | * trying). This is to enable a debug printout even if a | ||
4134 | * tasklist_lock-holding task deadlocks or crashes. | ||
4135 | */ | ||
4136 | retry: | ||
4137 | if (!read_trylock(&tasklist_lock)) { | ||
4138 | if (count == 10) | ||
4139 | printk("hm, tasklist_lock locked, retrying... "); | ||
4140 | if (count) { | ||
4141 | count--; | ||
4142 | printk(" #%d", 10-count); | ||
4143 | mdelay(200); | ||
4144 | goto retry; | ||
4145 | } | ||
4146 | printk(" ignoring it.\n"); | ||
4147 | unlock = 0; | ||
4148 | } else { | ||
4149 | if (count != 10) | ||
4150 | printk(KERN_CONT " locked it.\n"); | ||
4151 | } | ||
4152 | |||
4153 | do_each_thread(g, p) { | ||
4154 | /* | ||
4155 | * It's not reliable to print a task's held locks | ||
4156 | * if it's not sleeping (or if it's not the current | ||
4157 | * task): | ||
4158 | */ | ||
4159 | if (p->state == TASK_RUNNING && p != current) | ||
4160 | continue; | ||
4161 | if (p->lockdep_depth) | ||
4162 | lockdep_print_held_locks(p); | ||
4163 | if (!unlock) | ||
4164 | if (read_trylock(&tasklist_lock)) | ||
4165 | unlock = 1; | ||
4166 | } while_each_thread(g, p); | ||
4167 | |||
4168 | printk("\n"); | ||
4169 | printk("=============================================\n\n"); | ||
4170 | |||
4171 | if (unlock) | ||
4172 | read_unlock(&tasklist_lock); | ||
4173 | } | ||
4174 | EXPORT_SYMBOL_GPL(debug_show_all_locks); | ||
4175 | |||
4176 | /* | ||
4177 | * Careful: only use this function if you are sure that | ||
4178 | * the task cannot run in parallel! | ||
4179 | */ | ||
4180 | void debug_show_held_locks(struct task_struct *task) | ||
4181 | { | ||
4182 | if (unlikely(!debug_locks)) { | ||
4183 | printk("INFO: lockdep is turned off.\n"); | ||
4184 | return; | ||
4185 | } | ||
4186 | lockdep_print_held_locks(task); | ||
4187 | } | ||
4188 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | ||
4189 | |||
4190 | void lockdep_sys_exit(void) | ||
4191 | { | ||
4192 | struct task_struct *curr = current; | ||
4193 | |||
4194 | if (unlikely(curr->lockdep_depth)) { | ||
4195 | if (!debug_locks_off()) | ||
4196 | return; | ||
4197 | printk("\n"); | ||
4198 | printk("================================================\n"); | ||
4199 | printk("[ BUG: lock held when returning to user space! ]\n"); | ||
4200 | print_kernel_ident(); | ||
4201 | printk("------------------------------------------------\n"); | ||
4202 | printk("%s/%d is leaving the kernel with locks still held!\n", | ||
4203 | curr->comm, curr->pid); | ||
4204 | lockdep_print_held_locks(curr); | ||
4205 | } | ||
4206 | } | ||
4207 | |||
4208 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | ||
4209 | { | ||
4210 | struct task_struct *curr = current; | ||
4211 | |||
4212 | #ifndef CONFIG_PROVE_RCU_REPEATEDLY | ||
4213 | if (!debug_locks_off()) | ||
4214 | return; | ||
4215 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | ||
4216 | /* Note: the following can be executed concurrently, so be careful. */ | ||
4217 | printk("\n"); | ||
4218 | printk("===============================\n"); | ||
4219 | printk("[ INFO: suspicious RCU usage. ]\n"); | ||
4220 | print_kernel_ident(); | ||
4221 | printk("-------------------------------\n"); | ||
4222 | printk("%s:%d %s!\n", file, line, s); | ||
4223 | printk("\nother info that might help us debug this:\n\n"); | ||
4224 | printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", | ||
4225 | !rcu_lockdep_current_cpu_online() | ||
4226 | ? "RCU used illegally from offline CPU!\n" | ||
4227 | : !rcu_is_watching() | ||
4228 | ? "RCU used illegally from idle CPU!\n" | ||
4229 | : "", | ||
4230 | rcu_scheduler_active, debug_locks); | ||
4231 | |||
4232 | /* | ||
4233 | * If a CPU is in the RCU-free window in idle (ie: in the section | ||
4234 | * between rcu_idle_enter() and rcu_idle_exit(), then RCU | ||
4235 | * considers that CPU to be in an "extended quiescent state", | ||
4236 | * which means that RCU will be completely ignoring that CPU. | ||
4237 | * Therefore, rcu_read_lock() and friends have absolutely no | ||
4238 | * effect on a CPU running in that state. In other words, even if | ||
4239 | * such an RCU-idle CPU has called rcu_read_lock(), RCU might well | ||
4240 | * delete data structures out from under it. RCU really has no | ||
4241 | * choice here: we need to keep an RCU-free window in idle where | ||
4242 | * the CPU may possibly enter into low power mode. This way we can | ||
4243 | * notice an extended quiescent state to other CPUs that started a grace | ||
4244 | * period. Otherwise we would delay any grace period as long as we run | ||
4245 | * in the idle task. | ||
4246 | * | ||
4247 | * So complain bitterly if someone does call rcu_read_lock(), | ||
4248 | * rcu_read_lock_bh() and so on from extended quiescent states. | ||
4249 | */ | ||
4250 | if (!rcu_is_watching()) | ||
4251 | printk("RCU used illegally from extended quiescent state!\n"); | ||
4252 | |||
4253 | lockdep_print_held_locks(curr); | ||
4254 | printk("\nstack backtrace:\n"); | ||
4255 | dump_stack(); | ||
4256 | } | ||
4257 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); | ||
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h new file mode 100644 index 000000000000..4f560cfedc8f --- /dev/null +++ b/kernel/locking/lockdep_internals.h | |||
@@ -0,0 +1,170 @@ | |||
1 | /* | ||
2 | * kernel/lockdep_internals.h | ||
3 | * | ||
4 | * Runtime locking correctness validator | ||
5 | * | ||
6 | * lockdep subsystem internal functions and variables. | ||
7 | */ | ||
8 | |||
9 | /* | ||
10 | * Lock-class usage-state bits: | ||
11 | */ | ||
12 | enum lock_usage_bit { | ||
13 | #define LOCKDEP_STATE(__STATE) \ | ||
14 | LOCK_USED_IN_##__STATE, \ | ||
15 | LOCK_USED_IN_##__STATE##_READ, \ | ||
16 | LOCK_ENABLED_##__STATE, \ | ||
17 | LOCK_ENABLED_##__STATE##_READ, | ||
18 | #include "lockdep_states.h" | ||
19 | #undef LOCKDEP_STATE | ||
20 | LOCK_USED, | ||
21 | LOCK_USAGE_STATES | ||
22 | }; | ||
23 | |||
24 | /* | ||
25 | * Usage-state bitmasks: | ||
26 | */ | ||
27 | #define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE), | ||
28 | |||
29 | enum { | ||
30 | #define LOCKDEP_STATE(__STATE) \ | ||
31 | __LOCKF(USED_IN_##__STATE) \ | ||
32 | __LOCKF(USED_IN_##__STATE##_READ) \ | ||
33 | __LOCKF(ENABLED_##__STATE) \ | ||
34 | __LOCKF(ENABLED_##__STATE##_READ) | ||
35 | #include "lockdep_states.h" | ||
36 | #undef LOCKDEP_STATE | ||
37 | __LOCKF(USED) | ||
38 | }; | ||
39 | |||
40 | #define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) | ||
41 | #define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) | ||
42 | |||
43 | #define LOCKF_ENABLED_IRQ_READ \ | ||
44 | (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) | ||
45 | #define LOCKF_USED_IN_IRQ_READ \ | ||
46 | (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) | ||
47 | |||
48 | /* | ||
49 | * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies | ||
50 | * we track. | ||
51 | * | ||
52 | * We use the per-lock dependency maps in two ways: we grow it by adding | ||
53 | * every to-be-taken lock to all currently held lock's own dependency | ||
54 | * table (if it's not there yet), and we check it for lock order | ||
55 | * conflicts and deadlocks. | ||
56 | */ | ||
57 | #define MAX_LOCKDEP_ENTRIES 16384UL | ||
58 | |||
59 | #define MAX_LOCKDEP_CHAINS_BITS 15 | ||
60 | #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) | ||
61 | |||
62 | #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) | ||
63 | |||
64 | /* | ||
65 | * Stack-trace: tightly packed array of stack backtrace | ||
66 | * addresses. Protected by the hash_lock. | ||
67 | */ | ||
68 | #define MAX_STACK_TRACE_ENTRIES 262144UL | ||
69 | |||
70 | extern struct list_head all_lock_classes; | ||
71 | extern struct lock_chain lock_chains[]; | ||
72 | |||
73 | #define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2) | ||
74 | |||
75 | extern void get_usage_chars(struct lock_class *class, | ||
76 | char usage[LOCK_USAGE_CHARS]); | ||
77 | |||
78 | extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); | ||
79 | |||
80 | struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i); | ||
81 | |||
82 | extern unsigned long nr_lock_classes; | ||
83 | extern unsigned long nr_list_entries; | ||
84 | extern unsigned long nr_lock_chains; | ||
85 | extern int nr_chain_hlocks; | ||
86 | extern unsigned long nr_stack_trace_entries; | ||
87 | |||
88 | extern unsigned int nr_hardirq_chains; | ||
89 | extern unsigned int nr_softirq_chains; | ||
90 | extern unsigned int nr_process_chains; | ||
91 | extern unsigned int max_lockdep_depth; | ||
92 | extern unsigned int max_recursion_depth; | ||
93 | |||
94 | extern unsigned int max_bfs_queue_depth; | ||
95 | |||
96 | #ifdef CONFIG_PROVE_LOCKING | ||
97 | extern unsigned long lockdep_count_forward_deps(struct lock_class *); | ||
98 | extern unsigned long lockdep_count_backward_deps(struct lock_class *); | ||
99 | #else | ||
100 | static inline unsigned long | ||
101 | lockdep_count_forward_deps(struct lock_class *class) | ||
102 | { | ||
103 | return 0; | ||
104 | } | ||
105 | static inline unsigned long | ||
106 | lockdep_count_backward_deps(struct lock_class *class) | ||
107 | { | ||
108 | return 0; | ||
109 | } | ||
110 | #endif | ||
111 | |||
112 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
113 | |||
114 | #include <asm/local.h> | ||
115 | /* | ||
116 | * Various lockdep statistics. | ||
117 | * We want them per cpu as they are often accessed in fast path | ||
118 | * and we want to avoid too much cache bouncing. | ||
119 | */ | ||
120 | struct lockdep_stats { | ||
121 | int chain_lookup_hits; | ||
122 | int chain_lookup_misses; | ||
123 | int hardirqs_on_events; | ||
124 | int hardirqs_off_events; | ||
125 | int redundant_hardirqs_on; | ||
126 | int redundant_hardirqs_off; | ||
127 | int softirqs_on_events; | ||
128 | int softirqs_off_events; | ||
129 | int redundant_softirqs_on; | ||
130 | int redundant_softirqs_off; | ||
131 | int nr_unused_locks; | ||
132 | int nr_cyclic_checks; | ||
133 | int nr_cyclic_check_recursions; | ||
134 | int nr_find_usage_forwards_checks; | ||
135 | int nr_find_usage_forwards_recursions; | ||
136 | int nr_find_usage_backwards_checks; | ||
137 | int nr_find_usage_backwards_recursions; | ||
138 | }; | ||
139 | |||
140 | DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); | ||
141 | |||
142 | #define __debug_atomic_inc(ptr) \ | ||
143 | this_cpu_inc(lockdep_stats.ptr); | ||
144 | |||
145 | #define debug_atomic_inc(ptr) { \ | ||
146 | WARN_ON_ONCE(!irqs_disabled()); \ | ||
147 | __this_cpu_inc(lockdep_stats.ptr); \ | ||
148 | } | ||
149 | |||
150 | #define debug_atomic_dec(ptr) { \ | ||
151 | WARN_ON_ONCE(!irqs_disabled()); \ | ||
152 | __this_cpu_dec(lockdep_stats.ptr); \ | ||
153 | } | ||
154 | |||
155 | #define debug_atomic_read(ptr) ({ \ | ||
156 | struct lockdep_stats *__cpu_lockdep_stats; \ | ||
157 | unsigned long long __total = 0; \ | ||
158 | int __cpu; \ | ||
159 | for_each_possible_cpu(__cpu) { \ | ||
160 | __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \ | ||
161 | __total += __cpu_lockdep_stats->ptr; \ | ||
162 | } \ | ||
163 | __total; \ | ||
164 | }) | ||
165 | #else | ||
166 | # define __debug_atomic_inc(ptr) do { } while (0) | ||
167 | # define debug_atomic_inc(ptr) do { } while (0) | ||
168 | # define debug_atomic_dec(ptr) do { } while (0) | ||
169 | # define debug_atomic_read(ptr) 0 | ||
170 | #endif | ||
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c new file mode 100644 index 000000000000..ef43ac4bafb5 --- /dev/null +++ b/kernel/locking/lockdep_proc.c | |||
@@ -0,0 +1,683 @@ | |||
1 | /* | ||
2 | * kernel/lockdep_proc.c | ||
3 | * | ||
4 | * Runtime locking correctness validator | ||
5 | * | ||
6 | * Started by Ingo Molnar: | ||
7 | * | ||
8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
10 | * | ||
11 | * Code for /proc/lockdep and /proc/lockdep_stats: | ||
12 | * | ||
13 | */ | ||
14 | #include <linux/export.h> | ||
15 | #include <linux/proc_fs.h> | ||
16 | #include <linux/seq_file.h> | ||
17 | #include <linux/kallsyms.h> | ||
18 | #include <linux/debug_locks.h> | ||
19 | #include <linux/vmalloc.h> | ||
20 | #include <linux/sort.h> | ||
21 | #include <asm/uaccess.h> | ||
22 | #include <asm/div64.h> | ||
23 | |||
24 | #include "lockdep_internals.h" | ||
25 | |||
26 | static void *l_next(struct seq_file *m, void *v, loff_t *pos) | ||
27 | { | ||
28 | return seq_list_next(v, &all_lock_classes, pos); | ||
29 | } | ||
30 | |||
31 | static void *l_start(struct seq_file *m, loff_t *pos) | ||
32 | { | ||
33 | return seq_list_start_head(&all_lock_classes, *pos); | ||
34 | } | ||
35 | |||
36 | static void l_stop(struct seq_file *m, void *v) | ||
37 | { | ||
38 | } | ||
39 | |||
40 | static void print_name(struct seq_file *m, struct lock_class *class) | ||
41 | { | ||
42 | char str[KSYM_NAME_LEN]; | ||
43 | const char *name = class->name; | ||
44 | |||
45 | if (!name) { | ||
46 | name = __get_key_name(class->key, str); | ||
47 | seq_printf(m, "%s", name); | ||
48 | } else{ | ||
49 | seq_printf(m, "%s", name); | ||
50 | if (class->name_version > 1) | ||
51 | seq_printf(m, "#%d", class->name_version); | ||
52 | if (class->subclass) | ||
53 | seq_printf(m, "/%d", class->subclass); | ||
54 | } | ||
55 | } | ||
56 | |||
57 | static int l_show(struct seq_file *m, void *v) | ||
58 | { | ||
59 | struct lock_class *class = list_entry(v, struct lock_class, lock_entry); | ||
60 | struct lock_list *entry; | ||
61 | char usage[LOCK_USAGE_CHARS]; | ||
62 | |||
63 | if (v == &all_lock_classes) { | ||
64 | seq_printf(m, "all lock classes:\n"); | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | seq_printf(m, "%p", class->key); | ||
69 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
70 | seq_printf(m, " OPS:%8ld", class->ops); | ||
71 | #endif | ||
72 | #ifdef CONFIG_PROVE_LOCKING | ||
73 | seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class)); | ||
74 | seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); | ||
75 | #endif | ||
76 | |||
77 | get_usage_chars(class, usage); | ||
78 | seq_printf(m, " %s", usage); | ||
79 | |||
80 | seq_printf(m, ": "); | ||
81 | print_name(m, class); | ||
82 | seq_puts(m, "\n"); | ||
83 | |||
84 | list_for_each_entry(entry, &class->locks_after, entry) { | ||
85 | if (entry->distance == 1) { | ||
86 | seq_printf(m, " -> [%p] ", entry->class->key); | ||
87 | print_name(m, entry->class); | ||
88 | seq_puts(m, "\n"); | ||
89 | } | ||
90 | } | ||
91 | seq_puts(m, "\n"); | ||
92 | |||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static const struct seq_operations lockdep_ops = { | ||
97 | .start = l_start, | ||
98 | .next = l_next, | ||
99 | .stop = l_stop, | ||
100 | .show = l_show, | ||
101 | }; | ||
102 | |||
103 | static int lockdep_open(struct inode *inode, struct file *file) | ||
104 | { | ||
105 | return seq_open(file, &lockdep_ops); | ||
106 | } | ||
107 | |||
108 | static const struct file_operations proc_lockdep_operations = { | ||
109 | .open = lockdep_open, | ||
110 | .read = seq_read, | ||
111 | .llseek = seq_lseek, | ||
112 | .release = seq_release, | ||
113 | }; | ||
114 | |||
115 | #ifdef CONFIG_PROVE_LOCKING | ||
116 | static void *lc_start(struct seq_file *m, loff_t *pos) | ||
117 | { | ||
118 | if (*pos == 0) | ||
119 | return SEQ_START_TOKEN; | ||
120 | |||
121 | if (*pos - 1 < nr_lock_chains) | ||
122 | return lock_chains + (*pos - 1); | ||
123 | |||
124 | return NULL; | ||
125 | } | ||
126 | |||
127 | static void *lc_next(struct seq_file *m, void *v, loff_t *pos) | ||
128 | { | ||
129 | (*pos)++; | ||
130 | return lc_start(m, pos); | ||
131 | } | ||
132 | |||
133 | static void lc_stop(struct seq_file *m, void *v) | ||
134 | { | ||
135 | } | ||
136 | |||
137 | static int lc_show(struct seq_file *m, void *v) | ||
138 | { | ||
139 | struct lock_chain *chain = v; | ||
140 | struct lock_class *class; | ||
141 | int i; | ||
142 | |||
143 | if (v == SEQ_START_TOKEN) { | ||
144 | seq_printf(m, "all lock chains:\n"); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | seq_printf(m, "irq_context: %d\n", chain->irq_context); | ||
149 | |||
150 | for (i = 0; i < chain->depth; i++) { | ||
151 | class = lock_chain_get_class(chain, i); | ||
152 | if (!class->key) | ||
153 | continue; | ||
154 | |||
155 | seq_printf(m, "[%p] ", class->key); | ||
156 | print_name(m, class); | ||
157 | seq_puts(m, "\n"); | ||
158 | } | ||
159 | seq_puts(m, "\n"); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static const struct seq_operations lockdep_chains_ops = { | ||
165 | .start = lc_start, | ||
166 | .next = lc_next, | ||
167 | .stop = lc_stop, | ||
168 | .show = lc_show, | ||
169 | }; | ||
170 | |||
171 | static int lockdep_chains_open(struct inode *inode, struct file *file) | ||
172 | { | ||
173 | return seq_open(file, &lockdep_chains_ops); | ||
174 | } | ||
175 | |||
176 | static const struct file_operations proc_lockdep_chains_operations = { | ||
177 | .open = lockdep_chains_open, | ||
178 | .read = seq_read, | ||
179 | .llseek = seq_lseek, | ||
180 | .release = seq_release, | ||
181 | }; | ||
182 | #endif /* CONFIG_PROVE_LOCKING */ | ||
183 | |||
184 | static void lockdep_stats_debug_show(struct seq_file *m) | ||
185 | { | ||
186 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
187 | unsigned long long hi1 = debug_atomic_read(hardirqs_on_events), | ||
188 | hi2 = debug_atomic_read(hardirqs_off_events), | ||
189 | hr1 = debug_atomic_read(redundant_hardirqs_on), | ||
190 | hr2 = debug_atomic_read(redundant_hardirqs_off), | ||
191 | si1 = debug_atomic_read(softirqs_on_events), | ||
192 | si2 = debug_atomic_read(softirqs_off_events), | ||
193 | sr1 = debug_atomic_read(redundant_softirqs_on), | ||
194 | sr2 = debug_atomic_read(redundant_softirqs_off); | ||
195 | |||
196 | seq_printf(m, " chain lookup misses: %11llu\n", | ||
197 | debug_atomic_read(chain_lookup_misses)); | ||
198 | seq_printf(m, " chain lookup hits: %11llu\n", | ||
199 | debug_atomic_read(chain_lookup_hits)); | ||
200 | seq_printf(m, " cyclic checks: %11llu\n", | ||
201 | debug_atomic_read(nr_cyclic_checks)); | ||
202 | seq_printf(m, " find-mask forwards checks: %11llu\n", | ||
203 | debug_atomic_read(nr_find_usage_forwards_checks)); | ||
204 | seq_printf(m, " find-mask backwards checks: %11llu\n", | ||
205 | debug_atomic_read(nr_find_usage_backwards_checks)); | ||
206 | |||
207 | seq_printf(m, " hardirq on events: %11llu\n", hi1); | ||
208 | seq_printf(m, " hardirq off events: %11llu\n", hi2); | ||
209 | seq_printf(m, " redundant hardirq ons: %11llu\n", hr1); | ||
210 | seq_printf(m, " redundant hardirq offs: %11llu\n", hr2); | ||
211 | seq_printf(m, " softirq on events: %11llu\n", si1); | ||
212 | seq_printf(m, " softirq off events: %11llu\n", si2); | ||
213 | seq_printf(m, " redundant softirq ons: %11llu\n", sr1); | ||
214 | seq_printf(m, " redundant softirq offs: %11llu\n", sr2); | ||
215 | #endif | ||
216 | } | ||
217 | |||
218 | static int lockdep_stats_show(struct seq_file *m, void *v) | ||
219 | { | ||
220 | struct lock_class *class; | ||
221 | unsigned long nr_unused = 0, nr_uncategorized = 0, | ||
222 | nr_irq_safe = 0, nr_irq_unsafe = 0, | ||
223 | nr_softirq_safe = 0, nr_softirq_unsafe = 0, | ||
224 | nr_hardirq_safe = 0, nr_hardirq_unsafe = 0, | ||
225 | nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, | ||
226 | nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, | ||
227 | nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, | ||
228 | sum_forward_deps = 0; | ||
229 | |||
230 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
231 | |||
232 | if (class->usage_mask == 0) | ||
233 | nr_unused++; | ||
234 | if (class->usage_mask == LOCKF_USED) | ||
235 | nr_uncategorized++; | ||
236 | if (class->usage_mask & LOCKF_USED_IN_IRQ) | ||
237 | nr_irq_safe++; | ||
238 | if (class->usage_mask & LOCKF_ENABLED_IRQ) | ||
239 | nr_irq_unsafe++; | ||
240 | if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) | ||
241 | nr_softirq_safe++; | ||
242 | if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ) | ||
243 | nr_softirq_unsafe++; | ||
244 | if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) | ||
245 | nr_hardirq_safe++; | ||
246 | if (class->usage_mask & LOCKF_ENABLED_HARDIRQ) | ||
247 | nr_hardirq_unsafe++; | ||
248 | if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) | ||
249 | nr_irq_read_safe++; | ||
250 | if (class->usage_mask & LOCKF_ENABLED_IRQ_READ) | ||
251 | nr_irq_read_unsafe++; | ||
252 | if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) | ||
253 | nr_softirq_read_safe++; | ||
254 | if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ) | ||
255 | nr_softirq_read_unsafe++; | ||
256 | if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) | ||
257 | nr_hardirq_read_safe++; | ||
258 | if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ) | ||
259 | nr_hardirq_read_unsafe++; | ||
260 | |||
261 | #ifdef CONFIG_PROVE_LOCKING | ||
262 | sum_forward_deps += lockdep_count_forward_deps(class); | ||
263 | #endif | ||
264 | } | ||
265 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
266 | DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); | ||
267 | #endif | ||
268 | seq_printf(m, " lock-classes: %11lu [max: %lu]\n", | ||
269 | nr_lock_classes, MAX_LOCKDEP_KEYS); | ||
270 | seq_printf(m, " direct dependencies: %11lu [max: %lu]\n", | ||
271 | nr_list_entries, MAX_LOCKDEP_ENTRIES); | ||
272 | seq_printf(m, " indirect dependencies: %11lu\n", | ||
273 | sum_forward_deps); | ||
274 | |||
275 | /* | ||
276 | * Total number of dependencies: | ||
277 | * | ||
278 | * All irq-safe locks may nest inside irq-unsafe locks, | ||
279 | * plus all the other known dependencies: | ||
280 | */ | ||
281 | seq_printf(m, " all direct dependencies: %11lu\n", | ||
282 | nr_irq_unsafe * nr_irq_safe + | ||
283 | nr_hardirq_unsafe * nr_hardirq_safe + | ||
284 | nr_list_entries); | ||
285 | |||
286 | #ifdef CONFIG_PROVE_LOCKING | ||
287 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", | ||
288 | nr_lock_chains, MAX_LOCKDEP_CHAINS); | ||
289 | seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n", | ||
290 | nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS); | ||
291 | #endif | ||
292 | |||
293 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
294 | seq_printf(m, " in-hardirq chains: %11u\n", | ||
295 | nr_hardirq_chains); | ||
296 | seq_printf(m, " in-softirq chains: %11u\n", | ||
297 | nr_softirq_chains); | ||
298 | #endif | ||
299 | seq_printf(m, " in-process chains: %11u\n", | ||
300 | nr_process_chains); | ||
301 | seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", | ||
302 | nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); | ||
303 | seq_printf(m, " combined max dependencies: %11u\n", | ||
304 | (nr_hardirq_chains + 1) * | ||
305 | (nr_softirq_chains + 1) * | ||
306 | (nr_process_chains + 1) | ||
307 | ); | ||
308 | seq_printf(m, " hardirq-safe locks: %11lu\n", | ||
309 | nr_hardirq_safe); | ||
310 | seq_printf(m, " hardirq-unsafe locks: %11lu\n", | ||
311 | nr_hardirq_unsafe); | ||
312 | seq_printf(m, " softirq-safe locks: %11lu\n", | ||
313 | nr_softirq_safe); | ||
314 | seq_printf(m, " softirq-unsafe locks: %11lu\n", | ||
315 | nr_softirq_unsafe); | ||
316 | seq_printf(m, " irq-safe locks: %11lu\n", | ||
317 | nr_irq_safe); | ||
318 | seq_printf(m, " irq-unsafe locks: %11lu\n", | ||
319 | nr_irq_unsafe); | ||
320 | |||
321 | seq_printf(m, " hardirq-read-safe locks: %11lu\n", | ||
322 | nr_hardirq_read_safe); | ||
323 | seq_printf(m, " hardirq-read-unsafe locks: %11lu\n", | ||
324 | nr_hardirq_read_unsafe); | ||
325 | seq_printf(m, " softirq-read-safe locks: %11lu\n", | ||
326 | nr_softirq_read_safe); | ||
327 | seq_printf(m, " softirq-read-unsafe locks: %11lu\n", | ||
328 | nr_softirq_read_unsafe); | ||
329 | seq_printf(m, " irq-read-safe locks: %11lu\n", | ||
330 | nr_irq_read_safe); | ||
331 | seq_printf(m, " irq-read-unsafe locks: %11lu\n", | ||
332 | nr_irq_read_unsafe); | ||
333 | |||
334 | seq_printf(m, " uncategorized locks: %11lu\n", | ||
335 | nr_uncategorized); | ||
336 | seq_printf(m, " unused locks: %11lu\n", | ||
337 | nr_unused); | ||
338 | seq_printf(m, " max locking depth: %11u\n", | ||
339 | max_lockdep_depth); | ||
340 | #ifdef CONFIG_PROVE_LOCKING | ||
341 | seq_printf(m, " max bfs queue depth: %11u\n", | ||
342 | max_bfs_queue_depth); | ||
343 | #endif | ||
344 | lockdep_stats_debug_show(m); | ||
345 | seq_printf(m, " debug_locks: %11u\n", | ||
346 | debug_locks); | ||
347 | |||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | static int lockdep_stats_open(struct inode *inode, struct file *file) | ||
352 | { | ||
353 | return single_open(file, lockdep_stats_show, NULL); | ||
354 | } | ||
355 | |||
356 | static const struct file_operations proc_lockdep_stats_operations = { | ||
357 | .open = lockdep_stats_open, | ||
358 | .read = seq_read, | ||
359 | .llseek = seq_lseek, | ||
360 | .release = single_release, | ||
361 | }; | ||
362 | |||
363 | #ifdef CONFIG_LOCK_STAT | ||
364 | |||
365 | struct lock_stat_data { | ||
366 | struct lock_class *class; | ||
367 | struct lock_class_stats stats; | ||
368 | }; | ||
369 | |||
370 | struct lock_stat_seq { | ||
371 | struct lock_stat_data *iter_end; | ||
372 | struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; | ||
373 | }; | ||
374 | |||
375 | /* | ||
376 | * sort on absolute number of contentions | ||
377 | */ | ||
378 | static int lock_stat_cmp(const void *l, const void *r) | ||
379 | { | ||
380 | const struct lock_stat_data *dl = l, *dr = r; | ||
381 | unsigned long nl, nr; | ||
382 | |||
383 | nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; | ||
384 | nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; | ||
385 | |||
386 | return nr - nl; | ||
387 | } | ||
388 | |||
389 | static void seq_line(struct seq_file *m, char c, int offset, int length) | ||
390 | { | ||
391 | int i; | ||
392 | |||
393 | for (i = 0; i < offset; i++) | ||
394 | seq_puts(m, " "); | ||
395 | for (i = 0; i < length; i++) | ||
396 | seq_printf(m, "%c", c); | ||
397 | seq_puts(m, "\n"); | ||
398 | } | ||
399 | |||
400 | static void snprint_time(char *buf, size_t bufsiz, s64 nr) | ||
401 | { | ||
402 | s64 div; | ||
403 | s32 rem; | ||
404 | |||
405 | nr += 5; /* for display rounding */ | ||
406 | div = div_s64_rem(nr, 1000, &rem); | ||
407 | snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10); | ||
408 | } | ||
409 | |||
410 | static void seq_time(struct seq_file *m, s64 time) | ||
411 | { | ||
412 | char num[15]; | ||
413 | |||
414 | snprint_time(num, sizeof(num), time); | ||
415 | seq_printf(m, " %14s", num); | ||
416 | } | ||
417 | |||
418 | static void seq_lock_time(struct seq_file *m, struct lock_time *lt) | ||
419 | { | ||
420 | seq_printf(m, "%14lu", lt->nr); | ||
421 | seq_time(m, lt->min); | ||
422 | seq_time(m, lt->max); | ||
423 | seq_time(m, lt->total); | ||
424 | seq_time(m, lt->nr ? div_s64(lt->total, lt->nr) : 0); | ||
425 | } | ||
426 | |||
427 | static void seq_stats(struct seq_file *m, struct lock_stat_data *data) | ||
428 | { | ||
429 | char name[39]; | ||
430 | struct lock_class *class; | ||
431 | struct lock_class_stats *stats; | ||
432 | int i, namelen; | ||
433 | |||
434 | class = data->class; | ||
435 | stats = &data->stats; | ||
436 | |||
437 | namelen = 38; | ||
438 | if (class->name_version > 1) | ||
439 | namelen -= 2; /* XXX truncates versions > 9 */ | ||
440 | if (class->subclass) | ||
441 | namelen -= 2; | ||
442 | |||
443 | if (!class->name) { | ||
444 | char str[KSYM_NAME_LEN]; | ||
445 | const char *key_name; | ||
446 | |||
447 | key_name = __get_key_name(class->key, str); | ||
448 | snprintf(name, namelen, "%s", key_name); | ||
449 | } else { | ||
450 | snprintf(name, namelen, "%s", class->name); | ||
451 | } | ||
452 | namelen = strlen(name); | ||
453 | if (class->name_version > 1) { | ||
454 | snprintf(name+namelen, 3, "#%d", class->name_version); | ||
455 | namelen += 2; | ||
456 | } | ||
457 | if (class->subclass) { | ||
458 | snprintf(name+namelen, 3, "/%d", class->subclass); | ||
459 | namelen += 2; | ||
460 | } | ||
461 | |||
462 | if (stats->write_holdtime.nr) { | ||
463 | if (stats->read_holdtime.nr) | ||
464 | seq_printf(m, "%38s-W:", name); | ||
465 | else | ||
466 | seq_printf(m, "%40s:", name); | ||
467 | |||
468 | seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); | ||
469 | seq_lock_time(m, &stats->write_waittime); | ||
470 | seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); | ||
471 | seq_lock_time(m, &stats->write_holdtime); | ||
472 | seq_puts(m, "\n"); | ||
473 | } | ||
474 | |||
475 | if (stats->read_holdtime.nr) { | ||
476 | seq_printf(m, "%38s-R:", name); | ||
477 | seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); | ||
478 | seq_lock_time(m, &stats->read_waittime); | ||
479 | seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); | ||
480 | seq_lock_time(m, &stats->read_holdtime); | ||
481 | seq_puts(m, "\n"); | ||
482 | } | ||
483 | |||
484 | if (stats->read_waittime.nr + stats->write_waittime.nr == 0) | ||
485 | return; | ||
486 | |||
487 | if (stats->read_holdtime.nr) | ||
488 | namelen += 2; | ||
489 | |||
490 | for (i = 0; i < LOCKSTAT_POINTS; i++) { | ||
491 | char ip[32]; | ||
492 | |||
493 | if (class->contention_point[i] == 0) | ||
494 | break; | ||
495 | |||
496 | if (!i) | ||
497 | seq_line(m, '-', 40-namelen, namelen); | ||
498 | |||
499 | snprintf(ip, sizeof(ip), "[<%p>]", | ||
500 | (void *)class->contention_point[i]); | ||
501 | seq_printf(m, "%40s %14lu %29s %pS\n", | ||
502 | name, stats->contention_point[i], | ||
503 | ip, (void *)class->contention_point[i]); | ||
504 | } | ||
505 | for (i = 0; i < LOCKSTAT_POINTS; i++) { | ||
506 | char ip[32]; | ||
507 | |||
508 | if (class->contending_point[i] == 0) | ||
509 | break; | ||
510 | |||
511 | if (!i) | ||
512 | seq_line(m, '-', 40-namelen, namelen); | ||
513 | |||
514 | snprintf(ip, sizeof(ip), "[<%p>]", | ||
515 | (void *)class->contending_point[i]); | ||
516 | seq_printf(m, "%40s %14lu %29s %pS\n", | ||
517 | name, stats->contending_point[i], | ||
518 | ip, (void *)class->contending_point[i]); | ||
519 | } | ||
520 | if (i) { | ||
521 | seq_puts(m, "\n"); | ||
522 | seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1)); | ||
523 | seq_puts(m, "\n"); | ||
524 | } | ||
525 | } | ||
526 | |||
527 | static void seq_header(struct seq_file *m) | ||
528 | { | ||
529 | seq_puts(m, "lock_stat version 0.4\n"); | ||
530 | |||
531 | if (unlikely(!debug_locks)) | ||
532 | seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); | ||
533 | |||
534 | seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); | ||
535 | seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " | ||
536 | "%14s %14s\n", | ||
537 | "class name", | ||
538 | "con-bounces", | ||
539 | "contentions", | ||
540 | "waittime-min", | ||
541 | "waittime-max", | ||
542 | "waittime-total", | ||
543 | "waittime-avg", | ||
544 | "acq-bounces", | ||
545 | "acquisitions", | ||
546 | "holdtime-min", | ||
547 | "holdtime-max", | ||
548 | "holdtime-total", | ||
549 | "holdtime-avg"); | ||
550 | seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); | ||
551 | seq_printf(m, "\n"); | ||
552 | } | ||
553 | |||
554 | static void *ls_start(struct seq_file *m, loff_t *pos) | ||
555 | { | ||
556 | struct lock_stat_seq *data = m->private; | ||
557 | struct lock_stat_data *iter; | ||
558 | |||
559 | if (*pos == 0) | ||
560 | return SEQ_START_TOKEN; | ||
561 | |||
562 | iter = data->stats + (*pos - 1); | ||
563 | if (iter >= data->iter_end) | ||
564 | iter = NULL; | ||
565 | |||
566 | return iter; | ||
567 | } | ||
568 | |||
569 | static void *ls_next(struct seq_file *m, void *v, loff_t *pos) | ||
570 | { | ||
571 | (*pos)++; | ||
572 | return ls_start(m, pos); | ||
573 | } | ||
574 | |||
575 | static void ls_stop(struct seq_file *m, void *v) | ||
576 | { | ||
577 | } | ||
578 | |||
579 | static int ls_show(struct seq_file *m, void *v) | ||
580 | { | ||
581 | if (v == SEQ_START_TOKEN) | ||
582 | seq_header(m); | ||
583 | else | ||
584 | seq_stats(m, v); | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | static const struct seq_operations lockstat_ops = { | ||
590 | .start = ls_start, | ||
591 | .next = ls_next, | ||
592 | .stop = ls_stop, | ||
593 | .show = ls_show, | ||
594 | }; | ||
595 | |||
596 | static int lock_stat_open(struct inode *inode, struct file *file) | ||
597 | { | ||
598 | int res; | ||
599 | struct lock_class *class; | ||
600 | struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); | ||
601 | |||
602 | if (!data) | ||
603 | return -ENOMEM; | ||
604 | |||
605 | res = seq_open(file, &lockstat_ops); | ||
606 | if (!res) { | ||
607 | struct lock_stat_data *iter = data->stats; | ||
608 | struct seq_file *m = file->private_data; | ||
609 | |||
610 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
611 | iter->class = class; | ||
612 | iter->stats = lock_stats(class); | ||
613 | iter++; | ||
614 | } | ||
615 | data->iter_end = iter; | ||
616 | |||
617 | sort(data->stats, data->iter_end - data->stats, | ||
618 | sizeof(struct lock_stat_data), | ||
619 | lock_stat_cmp, NULL); | ||
620 | |||
621 | m->private = data; | ||
622 | } else | ||
623 | vfree(data); | ||
624 | |||
625 | return res; | ||
626 | } | ||
627 | |||
628 | static ssize_t lock_stat_write(struct file *file, const char __user *buf, | ||
629 | size_t count, loff_t *ppos) | ||
630 | { | ||
631 | struct lock_class *class; | ||
632 | char c; | ||
633 | |||
634 | if (count) { | ||
635 | if (get_user(c, buf)) | ||
636 | return -EFAULT; | ||
637 | |||
638 | if (c != '0') | ||
639 | return count; | ||
640 | |||
641 | list_for_each_entry(class, &all_lock_classes, lock_entry) | ||
642 | clear_lock_stats(class); | ||
643 | } | ||
644 | return count; | ||
645 | } | ||
646 | |||
647 | static int lock_stat_release(struct inode *inode, struct file *file) | ||
648 | { | ||
649 | struct seq_file *seq = file->private_data; | ||
650 | |||
651 | vfree(seq->private); | ||
652 | return seq_release(inode, file); | ||
653 | } | ||
654 | |||
655 | static const struct file_operations proc_lock_stat_operations = { | ||
656 | .open = lock_stat_open, | ||
657 | .write = lock_stat_write, | ||
658 | .read = seq_read, | ||
659 | .llseek = seq_lseek, | ||
660 | .release = lock_stat_release, | ||
661 | }; | ||
662 | #endif /* CONFIG_LOCK_STAT */ | ||
663 | |||
664 | static int __init lockdep_proc_init(void) | ||
665 | { | ||
666 | proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); | ||
667 | #ifdef CONFIG_PROVE_LOCKING | ||
668 | proc_create("lockdep_chains", S_IRUSR, NULL, | ||
669 | &proc_lockdep_chains_operations); | ||
670 | #endif | ||
671 | proc_create("lockdep_stats", S_IRUSR, NULL, | ||
672 | &proc_lockdep_stats_operations); | ||
673 | |||
674 | #ifdef CONFIG_LOCK_STAT | ||
675 | proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, | ||
676 | &proc_lock_stat_operations); | ||
677 | #endif | ||
678 | |||
679 | return 0; | ||
680 | } | ||
681 | |||
682 | __initcall(lockdep_proc_init); | ||
683 | |||
diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h new file mode 100644 index 000000000000..995b0cc2b84c --- /dev/null +++ b/kernel/locking/lockdep_states.h | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * Lockdep states, | ||
3 | * | ||
4 | * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever | ||
5 | * you add one, or come up with a nice dynamic solution. | ||
6 | */ | ||
7 | LOCKDEP_STATE(HARDIRQ) | ||
8 | LOCKDEP_STATE(SOFTIRQ) | ||
9 | LOCKDEP_STATE(RECLAIM_FS) | ||
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c new file mode 100644 index 000000000000..7e3443fe1f48 --- /dev/null +++ b/kernel/locking/mutex-debug.c | |||
@@ -0,0 +1,110 @@ | |||
1 | /* | ||
2 | * kernel/mutex-debug.c | ||
3 | * | ||
4 | * Debugging code for mutexes | ||
5 | * | ||
6 | * Started by Ingo Molnar: | ||
7 | * | ||
8 | * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
9 | * | ||
10 | * lock debugging, locking tree, deadlock detection started by: | ||
11 | * | ||
12 | * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | ||
13 | * Released under the General Public License (GPL). | ||
14 | */ | ||
15 | #include <linux/mutex.h> | ||
16 | #include <linux/delay.h> | ||
17 | #include <linux/export.h> | ||
18 | #include <linux/poison.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/kallsyms.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/debug_locks.h> | ||
24 | |||
25 | #include "mutex-debug.h" | ||
26 | |||
27 | /* | ||
28 | * Must be called with lock->wait_lock held. | ||
29 | */ | ||
30 | void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) | ||
31 | { | ||
32 | memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); | ||
33 | waiter->magic = waiter; | ||
34 | INIT_LIST_HEAD(&waiter->list); | ||
35 | } | ||
36 | |||
37 | void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) | ||
38 | { | ||
39 | SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); | ||
40 | DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); | ||
41 | DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); | ||
42 | DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); | ||
43 | } | ||
44 | |||
45 | void debug_mutex_free_waiter(struct mutex_waiter *waiter) | ||
46 | { | ||
47 | DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list)); | ||
48 | memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); | ||
49 | } | ||
50 | |||
51 | void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, | ||
52 | struct thread_info *ti) | ||
53 | { | ||
54 | SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); | ||
55 | |||
56 | /* Mark the current thread as blocked on the lock: */ | ||
57 | ti->task->blocked_on = waiter; | ||
58 | } | ||
59 | |||
60 | void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | ||
61 | struct thread_info *ti) | ||
62 | { | ||
63 | DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); | ||
64 | DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); | ||
65 | DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); | ||
66 | ti->task->blocked_on = NULL; | ||
67 | |||
68 | list_del_init(&waiter->list); | ||
69 | waiter->task = NULL; | ||
70 | } | ||
71 | |||
72 | void debug_mutex_unlock(struct mutex *lock) | ||
73 | { | ||
74 | if (unlikely(!debug_locks)) | ||
75 | return; | ||
76 | |||
77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | ||
78 | DEBUG_LOCKS_WARN_ON(lock->owner != current); | ||
79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | ||
80 | mutex_clear_owner(lock); | ||
81 | } | ||
82 | |||
83 | void debug_mutex_init(struct mutex *lock, const char *name, | ||
84 | struct lock_class_key *key) | ||
85 | { | ||
86 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
87 | /* | ||
88 | * Make sure we are not reinitializing a held lock: | ||
89 | */ | ||
90 | debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | ||
91 | lockdep_init_map(&lock->dep_map, name, key, 0); | ||
92 | #endif | ||
93 | lock->magic = lock; | ||
94 | } | ||
95 | |||
96 | /*** | ||
97 | * mutex_destroy - mark a mutex unusable | ||
98 | * @lock: the mutex to be destroyed | ||
99 | * | ||
100 | * This function marks the mutex uninitialized, and any subsequent | ||
101 | * use of the mutex is forbidden. The mutex must not be locked when | ||
102 | * this function is called. | ||
103 | */ | ||
104 | void mutex_destroy(struct mutex *lock) | ||
105 | { | ||
106 | DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); | ||
107 | lock->magic = NULL; | ||
108 | } | ||
109 | |||
110 | EXPORT_SYMBOL_GPL(mutex_destroy); | ||
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h new file mode 100644 index 000000000000..0799fd3e4cfa --- /dev/null +++ b/kernel/locking/mutex-debug.h | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * Mutexes: blocking mutual exclusion locks | ||
3 | * | ||
4 | * started by Ingo Molnar: | ||
5 | * | ||
6 | * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * | ||
8 | * This file contains mutex debugging related internal declarations, | ||
9 | * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. | ||
10 | * More details are in kernel/mutex-debug.c. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * This must be called with lock->wait_lock held. | ||
15 | */ | ||
16 | extern void debug_mutex_lock_common(struct mutex *lock, | ||
17 | struct mutex_waiter *waiter); | ||
18 | extern void debug_mutex_wake_waiter(struct mutex *lock, | ||
19 | struct mutex_waiter *waiter); | ||
20 | extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); | ||
21 | extern void debug_mutex_add_waiter(struct mutex *lock, | ||
22 | struct mutex_waiter *waiter, | ||
23 | struct thread_info *ti); | ||
24 | extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | ||
25 | struct thread_info *ti); | ||
26 | extern void debug_mutex_unlock(struct mutex *lock); | ||
27 | extern void debug_mutex_init(struct mutex *lock, const char *name, | ||
28 | struct lock_class_key *key); | ||
29 | |||
30 | static inline void mutex_set_owner(struct mutex *lock) | ||
31 | { | ||
32 | lock->owner = current; | ||
33 | } | ||
34 | |||
35 | static inline void mutex_clear_owner(struct mutex *lock) | ||
36 | { | ||
37 | lock->owner = NULL; | ||
38 | } | ||
39 | |||
40 | #define spin_lock_mutex(lock, flags) \ | ||
41 | do { \ | ||
42 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ | ||
43 | \ | ||
44 | DEBUG_LOCKS_WARN_ON(in_interrupt()); \ | ||
45 | local_irq_save(flags); \ | ||
46 | arch_spin_lock(&(lock)->rlock.raw_lock);\ | ||
47 | DEBUG_LOCKS_WARN_ON(l->magic != l); \ | ||
48 | } while (0) | ||
49 | |||
50 | #define spin_unlock_mutex(lock, flags) \ | ||
51 | do { \ | ||
52 | arch_spin_unlock(&(lock)->rlock.raw_lock); \ | ||
53 | local_irq_restore(flags); \ | ||
54 | preempt_check_resched(); \ | ||
55 | } while (0) | ||
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c new file mode 100644 index 000000000000..4dd6e4c219de --- /dev/null +++ b/kernel/locking/mutex.c | |||
@@ -0,0 +1,960 @@ | |||
1 | /* | ||
2 | * kernel/locking/mutex.c | ||
3 | * | ||
4 | * Mutexes: blocking mutual exclusion locks | ||
5 | * | ||
6 | * Started by Ingo Molnar: | ||
7 | * | ||
8 | * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
9 | * | ||
10 | * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and | ||
11 | * David Howells for suggestions and improvements. | ||
12 | * | ||
13 | * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline | ||
14 | * from the -rt tree, where it was originally implemented for rtmutexes | ||
15 | * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale | ||
16 | * and Sven Dietrich. | ||
17 | * | ||
18 | * Also see Documentation/mutex-design.txt. | ||
19 | */ | ||
20 | #include <linux/mutex.h> | ||
21 | #include <linux/ww_mutex.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/sched/rt.h> | ||
24 | #include <linux/export.h> | ||
25 | #include <linux/spinlock.h> | ||
26 | #include <linux/interrupt.h> | ||
27 | #include <linux/debug_locks.h> | ||
28 | |||
29 | /* | ||
30 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, | ||
31 | * which forces all calls into the slowpath: | ||
32 | */ | ||
33 | #ifdef CONFIG_DEBUG_MUTEXES | ||
34 | # include "mutex-debug.h" | ||
35 | # include <asm-generic/mutex-null.h> | ||
36 | #else | ||
37 | # include "mutex.h" | ||
38 | # include <asm/mutex.h> | ||
39 | #endif | ||
40 | |||
41 | /* | ||
42 | * A negative mutex count indicates that waiters are sleeping waiting for the | ||
43 | * mutex. | ||
44 | */ | ||
45 | #define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) | ||
46 | |||
47 | void | ||
48 | __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | ||
49 | { | ||
50 | atomic_set(&lock->count, 1); | ||
51 | spin_lock_init(&lock->wait_lock); | ||
52 | INIT_LIST_HEAD(&lock->wait_list); | ||
53 | mutex_clear_owner(lock); | ||
54 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
55 | lock->spin_mlock = NULL; | ||
56 | #endif | ||
57 | |||
58 | debug_mutex_init(lock, name, key); | ||
59 | } | ||
60 | |||
61 | EXPORT_SYMBOL(__mutex_init); | ||
62 | |||
63 | #ifndef CONFIG_DEBUG_LOCK_ALLOC | ||
64 | /* | ||
65 | * We split the mutex lock/unlock logic into separate fastpath and | ||
66 | * slowpath functions, to reduce the register pressure on the fastpath. | ||
67 | * We also put the fastpath first in the kernel image, to make sure the | ||
68 | * branch is predicted by the CPU as default-untaken. | ||
69 | */ | ||
70 | static __used noinline void __sched | ||
71 | __mutex_lock_slowpath(atomic_t *lock_count); | ||
72 | |||
73 | /** | ||
74 | * mutex_lock - acquire the mutex | ||
75 | * @lock: the mutex to be acquired | ||
76 | * | ||
77 | * Lock the mutex exclusively for this task. If the mutex is not | ||
78 | * available right now, it will sleep until it can get it. | ||
79 | * | ||
80 | * The mutex must later on be released by the same task that | ||
81 | * acquired it. Recursive locking is not allowed. The task | ||
82 | * may not exit without first unlocking the mutex. Also, kernel | ||
83 | * memory where the mutex resides mutex must not be freed with | ||
84 | * the mutex still locked. The mutex must first be initialized | ||
85 | * (or statically defined) before it can be locked. memset()-ing | ||
86 | * the mutex to 0 is not allowed. | ||
87 | * | ||
88 | * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging | ||
89 | * checks that will enforce the restrictions and will also do | ||
90 | * deadlock debugging. ) | ||
91 | * | ||
92 | * This function is similar to (but not equivalent to) down(). | ||
93 | */ | ||
94 | void __sched mutex_lock(struct mutex *lock) | ||
95 | { | ||
96 | might_sleep(); | ||
97 | /* | ||
98 | * The locking fastpath is the 1->0 transition from | ||
99 | * 'unlocked' into 'locked' state. | ||
100 | */ | ||
101 | __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); | ||
102 | mutex_set_owner(lock); | ||
103 | } | ||
104 | |||
105 | EXPORT_SYMBOL(mutex_lock); | ||
106 | #endif | ||
107 | |||
108 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
109 | /* | ||
110 | * In order to avoid a stampede of mutex spinners from acquiring the mutex | ||
111 | * more or less simultaneously, the spinners need to acquire a MCS lock | ||
112 | * first before spinning on the owner field. | ||
113 | * | ||
114 | * We don't inline mspin_lock() so that perf can correctly account for the | ||
115 | * time spent in this lock function. | ||
116 | */ | ||
117 | struct mspin_node { | ||
118 | struct mspin_node *next ; | ||
119 | int locked; /* 1 if lock acquired */ | ||
120 | }; | ||
121 | #define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) | ||
122 | |||
123 | static noinline | ||
124 | void mspin_lock(struct mspin_node **lock, struct mspin_node *node) | ||
125 | { | ||
126 | struct mspin_node *prev; | ||
127 | |||
128 | /* Init node */ | ||
129 | node->locked = 0; | ||
130 | node->next = NULL; | ||
131 | |||
132 | prev = xchg(lock, node); | ||
133 | if (likely(prev == NULL)) { | ||
134 | /* Lock acquired */ | ||
135 | node->locked = 1; | ||
136 | return; | ||
137 | } | ||
138 | ACCESS_ONCE(prev->next) = node; | ||
139 | smp_wmb(); | ||
140 | /* Wait until the lock holder passes the lock down */ | ||
141 | while (!ACCESS_ONCE(node->locked)) | ||
142 | arch_mutex_cpu_relax(); | ||
143 | } | ||
144 | |||
145 | static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) | ||
146 | { | ||
147 | struct mspin_node *next = ACCESS_ONCE(node->next); | ||
148 | |||
149 | if (likely(!next)) { | ||
150 | /* | ||
151 | * Release the lock by setting it to NULL | ||
152 | */ | ||
153 | if (cmpxchg(lock, node, NULL) == node) | ||
154 | return; | ||
155 | /* Wait until the next pointer is set */ | ||
156 | while (!(next = ACCESS_ONCE(node->next))) | ||
157 | arch_mutex_cpu_relax(); | ||
158 | } | ||
159 | ACCESS_ONCE(next->locked) = 1; | ||
160 | smp_wmb(); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Mutex spinning code migrated from kernel/sched/core.c | ||
165 | */ | ||
166 | |||
167 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | ||
168 | { | ||
169 | if (lock->owner != owner) | ||
170 | return false; | ||
171 | |||
172 | /* | ||
173 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
174 | * lock->owner still matches owner, if that fails, owner might | ||
175 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
176 | * ensures the memory stays valid. | ||
177 | */ | ||
178 | barrier(); | ||
179 | |||
180 | return owner->on_cpu; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Look out! "owner" is an entirely speculative pointer | ||
185 | * access and not reliable. | ||
186 | */ | ||
187 | static noinline | ||
188 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | ||
189 | { | ||
190 | rcu_read_lock(); | ||
191 | while (owner_running(lock, owner)) { | ||
192 | if (need_resched()) | ||
193 | break; | ||
194 | |||
195 | arch_mutex_cpu_relax(); | ||
196 | } | ||
197 | rcu_read_unlock(); | ||
198 | |||
199 | /* | ||
200 | * We break out the loop above on need_resched() and when the | ||
201 | * owner changed, which is a sign for heavy contention. Return | ||
202 | * success only when lock->owner is NULL. | ||
203 | */ | ||
204 | return lock->owner == NULL; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Initial check for entering the mutex spinning loop | ||
209 | */ | ||
210 | static inline int mutex_can_spin_on_owner(struct mutex *lock) | ||
211 | { | ||
212 | struct task_struct *owner; | ||
213 | int retval = 1; | ||
214 | |||
215 | rcu_read_lock(); | ||
216 | owner = ACCESS_ONCE(lock->owner); | ||
217 | if (owner) | ||
218 | retval = owner->on_cpu; | ||
219 | rcu_read_unlock(); | ||
220 | /* | ||
221 | * if lock->owner is not set, the mutex owner may have just acquired | ||
222 | * it and not set the owner yet or the mutex has been released. | ||
223 | */ | ||
224 | return retval; | ||
225 | } | ||
226 | #endif | ||
227 | |||
228 | static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); | ||
229 | |||
230 | /** | ||
231 | * mutex_unlock - release the mutex | ||
232 | * @lock: the mutex to be released | ||
233 | * | ||
234 | * Unlock a mutex that has been locked by this task previously. | ||
235 | * | ||
236 | * This function must not be used in interrupt context. Unlocking | ||
237 | * of a not locked mutex is not allowed. | ||
238 | * | ||
239 | * This function is similar to (but not equivalent to) up(). | ||
240 | */ | ||
241 | void __sched mutex_unlock(struct mutex *lock) | ||
242 | { | ||
243 | /* | ||
244 | * The unlocking fastpath is the 0->1 transition from 'locked' | ||
245 | * into 'unlocked' state: | ||
246 | */ | ||
247 | #ifndef CONFIG_DEBUG_MUTEXES | ||
248 | /* | ||
249 | * When debugging is enabled we must not clear the owner before time, | ||
250 | * the slow path will always be taken, and that clears the owner field | ||
251 | * after verifying that it was indeed current. | ||
252 | */ | ||
253 | mutex_clear_owner(lock); | ||
254 | #endif | ||
255 | __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); | ||
256 | } | ||
257 | |||
258 | EXPORT_SYMBOL(mutex_unlock); | ||
259 | |||
260 | /** | ||
261 | * ww_mutex_unlock - release the w/w mutex | ||
262 | * @lock: the mutex to be released | ||
263 | * | ||
264 | * Unlock a mutex that has been locked by this task previously with any of the | ||
265 | * ww_mutex_lock* functions (with or without an acquire context). It is | ||
266 | * forbidden to release the locks after releasing the acquire context. | ||
267 | * | ||
268 | * This function must not be used in interrupt context. Unlocking | ||
269 | * of a unlocked mutex is not allowed. | ||
270 | */ | ||
271 | void __sched ww_mutex_unlock(struct ww_mutex *lock) | ||
272 | { | ||
273 | /* | ||
274 | * The unlocking fastpath is the 0->1 transition from 'locked' | ||
275 | * into 'unlocked' state: | ||
276 | */ | ||
277 | if (lock->ctx) { | ||
278 | #ifdef CONFIG_DEBUG_MUTEXES | ||
279 | DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | ||
280 | #endif | ||
281 | if (lock->ctx->acquired > 0) | ||
282 | lock->ctx->acquired--; | ||
283 | lock->ctx = NULL; | ||
284 | } | ||
285 | |||
286 | #ifndef CONFIG_DEBUG_MUTEXES | ||
287 | /* | ||
288 | * When debugging is enabled we must not clear the owner before time, | ||
289 | * the slow path will always be taken, and that clears the owner field | ||
290 | * after verifying that it was indeed current. | ||
291 | */ | ||
292 | mutex_clear_owner(&lock->base); | ||
293 | #endif | ||
294 | __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); | ||
295 | } | ||
296 | EXPORT_SYMBOL(ww_mutex_unlock); | ||
297 | |||
298 | static inline int __sched | ||
299 | __mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) | ||
300 | { | ||
301 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | ||
302 | struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | ||
303 | |||
304 | if (!hold_ctx) | ||
305 | return 0; | ||
306 | |||
307 | if (unlikely(ctx == hold_ctx)) | ||
308 | return -EALREADY; | ||
309 | |||
310 | if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | ||
311 | (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | ||
312 | #ifdef CONFIG_DEBUG_MUTEXES | ||
313 | DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | ||
314 | ctx->contending_lock = ww; | ||
315 | #endif | ||
316 | return -EDEADLK; | ||
317 | } | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | ||
323 | struct ww_acquire_ctx *ww_ctx) | ||
324 | { | ||
325 | #ifdef CONFIG_DEBUG_MUTEXES | ||
326 | /* | ||
327 | * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | ||
328 | * but released with a normal mutex_unlock in this call. | ||
329 | * | ||
330 | * This should never happen, always use ww_mutex_unlock. | ||
331 | */ | ||
332 | DEBUG_LOCKS_WARN_ON(ww->ctx); | ||
333 | |||
334 | /* | ||
335 | * Not quite done after calling ww_acquire_done() ? | ||
336 | */ | ||
337 | DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | ||
338 | |||
339 | if (ww_ctx->contending_lock) { | ||
340 | /* | ||
341 | * After -EDEADLK you tried to | ||
342 | * acquire a different ww_mutex? Bad! | ||
343 | */ | ||
344 | DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | ||
345 | |||
346 | /* | ||
347 | * You called ww_mutex_lock after receiving -EDEADLK, | ||
348 | * but 'forgot' to unlock everything else first? | ||
349 | */ | ||
350 | DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | ||
351 | ww_ctx->contending_lock = NULL; | ||
352 | } | ||
353 | |||
354 | /* | ||
355 | * Naughty, using a different class will lead to undefined behavior! | ||
356 | */ | ||
357 | DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | ||
358 | #endif | ||
359 | ww_ctx->acquired++; | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * after acquiring lock with fastpath or when we lost out in contested | ||
364 | * slowpath, set ctx and wake up any waiters so they can recheck. | ||
365 | * | ||
366 | * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, | ||
367 | * as the fastpath and opportunistic spinning are disabled in that case. | ||
368 | */ | ||
369 | static __always_inline void | ||
370 | ww_mutex_set_context_fastpath(struct ww_mutex *lock, | ||
371 | struct ww_acquire_ctx *ctx) | ||
372 | { | ||
373 | unsigned long flags; | ||
374 | struct mutex_waiter *cur; | ||
375 | |||
376 | ww_mutex_lock_acquired(lock, ctx); | ||
377 | |||
378 | lock->ctx = ctx; | ||
379 | |||
380 | /* | ||
381 | * The lock->ctx update should be visible on all cores before | ||
382 | * the atomic read is done, otherwise contended waiters might be | ||
383 | * missed. The contended waiters will either see ww_ctx == NULL | ||
384 | * and keep spinning, or it will acquire wait_lock, add itself | ||
385 | * to waiter list and sleep. | ||
386 | */ | ||
387 | smp_mb(); /* ^^^ */ | ||
388 | |||
389 | /* | ||
390 | * Check if lock is contended, if not there is nobody to wake up | ||
391 | */ | ||
392 | if (likely(atomic_read(&lock->base.count) == 0)) | ||
393 | return; | ||
394 | |||
395 | /* | ||
396 | * Uh oh, we raced in fastpath, wake up everyone in this case, | ||
397 | * so they can see the new lock->ctx. | ||
398 | */ | ||
399 | spin_lock_mutex(&lock->base.wait_lock, flags); | ||
400 | list_for_each_entry(cur, &lock->base.wait_list, list) { | ||
401 | debug_mutex_wake_waiter(&lock->base, cur); | ||
402 | wake_up_process(cur->task); | ||
403 | } | ||
404 | spin_unlock_mutex(&lock->base.wait_lock, flags); | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * Lock a mutex (possibly interruptible), slowpath: | ||
409 | */ | ||
410 | static __always_inline int __sched | ||
411 | __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | ||
412 | struct lockdep_map *nest_lock, unsigned long ip, | ||
413 | struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) | ||
414 | { | ||
415 | struct task_struct *task = current; | ||
416 | struct mutex_waiter waiter; | ||
417 | unsigned long flags; | ||
418 | int ret; | ||
419 | |||
420 | preempt_disable(); | ||
421 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); | ||
422 | |||
423 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
424 | /* | ||
425 | * Optimistic spinning. | ||
426 | * | ||
427 | * We try to spin for acquisition when we find that there are no | ||
428 | * pending waiters and the lock owner is currently running on a | ||
429 | * (different) CPU. | ||
430 | * | ||
431 | * The rationale is that if the lock owner is running, it is likely to | ||
432 | * release the lock soon. | ||
433 | * | ||
434 | * Since this needs the lock owner, and this mutex implementation | ||
435 | * doesn't track the owner atomically in the lock field, we need to | ||
436 | * track it non-atomically. | ||
437 | * | ||
438 | * We can't do this for DEBUG_MUTEXES because that relies on wait_lock | ||
439 | * to serialize everything. | ||
440 | * | ||
441 | * The mutex spinners are queued up using MCS lock so that only one | ||
442 | * spinner can compete for the mutex. However, if mutex spinning isn't | ||
443 | * going to happen, there is no point in going through the lock/unlock | ||
444 | * overhead. | ||
445 | */ | ||
446 | if (!mutex_can_spin_on_owner(lock)) | ||
447 | goto slowpath; | ||
448 | |||
449 | for (;;) { | ||
450 | struct task_struct *owner; | ||
451 | struct mspin_node node; | ||
452 | |||
453 | if (use_ww_ctx && ww_ctx->acquired > 0) { | ||
454 | struct ww_mutex *ww; | ||
455 | |||
456 | ww = container_of(lock, struct ww_mutex, base); | ||
457 | /* | ||
458 | * If ww->ctx is set the contents are undefined, only | ||
459 | * by acquiring wait_lock there is a guarantee that | ||
460 | * they are not invalid when reading. | ||
461 | * | ||
462 | * As such, when deadlock detection needs to be | ||
463 | * performed the optimistic spinning cannot be done. | ||
464 | */ | ||
465 | if (ACCESS_ONCE(ww->ctx)) | ||
466 | goto slowpath; | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * If there's an owner, wait for it to either | ||
471 | * release the lock or go to sleep. | ||
472 | */ | ||
473 | mspin_lock(MLOCK(lock), &node); | ||
474 | owner = ACCESS_ONCE(lock->owner); | ||
475 | if (owner && !mutex_spin_on_owner(lock, owner)) { | ||
476 | mspin_unlock(MLOCK(lock), &node); | ||
477 | goto slowpath; | ||
478 | } | ||
479 | |||
480 | if ((atomic_read(&lock->count) == 1) && | ||
481 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { | ||
482 | lock_acquired(&lock->dep_map, ip); | ||
483 | if (use_ww_ctx) { | ||
484 | struct ww_mutex *ww; | ||
485 | ww = container_of(lock, struct ww_mutex, base); | ||
486 | |||
487 | ww_mutex_set_context_fastpath(ww, ww_ctx); | ||
488 | } | ||
489 | |||
490 | mutex_set_owner(lock); | ||
491 | mspin_unlock(MLOCK(lock), &node); | ||
492 | preempt_enable(); | ||
493 | return 0; | ||
494 | } | ||
495 | mspin_unlock(MLOCK(lock), &node); | ||
496 | |||
497 | /* | ||
498 | * When there's no owner, we might have preempted between the | ||
499 | * owner acquiring the lock and setting the owner field. If | ||
500 | * we're an RT task that will live-lock because we won't let | ||
501 | * the owner complete. | ||
502 | */ | ||
503 | if (!owner && (need_resched() || rt_task(task))) | ||
504 | goto slowpath; | ||
505 | |||
506 | /* | ||
507 | * The cpu_relax() call is a compiler barrier which forces | ||
508 | * everything in this loop to be re-loaded. We don't need | ||
509 | * memory barriers as we'll eventually observe the right | ||
510 | * values at the cost of a few extra spins. | ||
511 | */ | ||
512 | arch_mutex_cpu_relax(); | ||
513 | } | ||
514 | slowpath: | ||
515 | #endif | ||
516 | spin_lock_mutex(&lock->wait_lock, flags); | ||
517 | |||
518 | /* once more, can we acquire the lock? */ | ||
519 | if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1)) | ||
520 | goto skip_wait; | ||
521 | |||
522 | debug_mutex_lock_common(lock, &waiter); | ||
523 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); | ||
524 | |||
525 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | ||
526 | list_add_tail(&waiter.list, &lock->wait_list); | ||
527 | waiter.task = task; | ||
528 | |||
529 | lock_contended(&lock->dep_map, ip); | ||
530 | |||
531 | for (;;) { | ||
532 | /* | ||
533 | * Lets try to take the lock again - this is needed even if | ||
534 | * we get here for the first time (shortly after failing to | ||
535 | * acquire the lock), to make sure that we get a wakeup once | ||
536 | * it's unlocked. Later on, if we sleep, this is the | ||
537 | * operation that gives us the lock. We xchg it to -1, so | ||
538 | * that when we release the lock, we properly wake up the | ||
539 | * other waiters: | ||
540 | */ | ||
541 | if (MUTEX_SHOW_NO_WAITER(lock) && | ||
542 | (atomic_xchg(&lock->count, -1) == 1)) | ||
543 | break; | ||
544 | |||
545 | /* | ||
546 | * got a signal? (This code gets eliminated in the | ||
547 | * TASK_UNINTERRUPTIBLE case.) | ||
548 | */ | ||
549 | if (unlikely(signal_pending_state(state, task))) { | ||
550 | ret = -EINTR; | ||
551 | goto err; | ||
552 | } | ||
553 | |||
554 | if (use_ww_ctx && ww_ctx->acquired > 0) { | ||
555 | ret = __mutex_lock_check_stamp(lock, ww_ctx); | ||
556 | if (ret) | ||
557 | goto err; | ||
558 | } | ||
559 | |||
560 | __set_task_state(task, state); | ||
561 | |||
562 | /* didn't get the lock, go to sleep: */ | ||
563 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
564 | schedule_preempt_disabled(); | ||
565 | spin_lock_mutex(&lock->wait_lock, flags); | ||
566 | } | ||
567 | mutex_remove_waiter(lock, &waiter, current_thread_info()); | ||
568 | /* set it to 0 if there are no waiters left: */ | ||
569 | if (likely(list_empty(&lock->wait_list))) | ||
570 | atomic_set(&lock->count, 0); | ||
571 | debug_mutex_free_waiter(&waiter); | ||
572 | |||
573 | skip_wait: | ||
574 | /* got the lock - cleanup and rejoice! */ | ||
575 | lock_acquired(&lock->dep_map, ip); | ||
576 | mutex_set_owner(lock); | ||
577 | |||
578 | if (use_ww_ctx) { | ||
579 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | ||
580 | struct mutex_waiter *cur; | ||
581 | |||
582 | /* | ||
583 | * This branch gets optimized out for the common case, | ||
584 | * and is only important for ww_mutex_lock. | ||
585 | */ | ||
586 | ww_mutex_lock_acquired(ww, ww_ctx); | ||
587 | ww->ctx = ww_ctx; | ||
588 | |||
589 | /* | ||
590 | * Give any possible sleeping processes the chance to wake up, | ||
591 | * so they can recheck if they have to back off. | ||
592 | */ | ||
593 | list_for_each_entry(cur, &lock->wait_list, list) { | ||
594 | debug_mutex_wake_waiter(lock, cur); | ||
595 | wake_up_process(cur->task); | ||
596 | } | ||
597 | } | ||
598 | |||
599 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
600 | preempt_enable(); | ||
601 | return 0; | ||
602 | |||
603 | err: | ||
604 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); | ||
605 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
606 | debug_mutex_free_waiter(&waiter); | ||
607 | mutex_release(&lock->dep_map, 1, ip); | ||
608 | preempt_enable(); | ||
609 | return ret; | ||
610 | } | ||
611 | |||
612 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
613 | void __sched | ||
614 | mutex_lock_nested(struct mutex *lock, unsigned int subclass) | ||
615 | { | ||
616 | might_sleep(); | ||
617 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, | ||
618 | subclass, NULL, _RET_IP_, NULL, 0); | ||
619 | } | ||
620 | |||
621 | EXPORT_SYMBOL_GPL(mutex_lock_nested); | ||
622 | |||
623 | void __sched | ||
624 | _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | ||
625 | { | ||
626 | might_sleep(); | ||
627 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, | ||
628 | 0, nest, _RET_IP_, NULL, 0); | ||
629 | } | ||
630 | |||
631 | EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); | ||
632 | |||
633 | int __sched | ||
634 | mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) | ||
635 | { | ||
636 | might_sleep(); | ||
637 | return __mutex_lock_common(lock, TASK_KILLABLE, | ||
638 | subclass, NULL, _RET_IP_, NULL, 0); | ||
639 | } | ||
640 | EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); | ||
641 | |||
642 | int __sched | ||
643 | mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) | ||
644 | { | ||
645 | might_sleep(); | ||
646 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, | ||
647 | subclass, NULL, _RET_IP_, NULL, 0); | ||
648 | } | ||
649 | |||
650 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); | ||
651 | |||
652 | static inline int | ||
653 | ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | ||
654 | { | ||
655 | #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | ||
656 | unsigned tmp; | ||
657 | |||
658 | if (ctx->deadlock_inject_countdown-- == 0) { | ||
659 | tmp = ctx->deadlock_inject_interval; | ||
660 | if (tmp > UINT_MAX/4) | ||
661 | tmp = UINT_MAX; | ||
662 | else | ||
663 | tmp = tmp*2 + tmp + tmp/2; | ||
664 | |||
665 | ctx->deadlock_inject_interval = tmp; | ||
666 | ctx->deadlock_inject_countdown = tmp; | ||
667 | ctx->contending_lock = lock; | ||
668 | |||
669 | ww_mutex_unlock(lock); | ||
670 | |||
671 | return -EDEADLK; | ||
672 | } | ||
673 | #endif | ||
674 | |||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | int __sched | ||
679 | __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | ||
680 | { | ||
681 | int ret; | ||
682 | |||
683 | might_sleep(); | ||
684 | ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, | ||
685 | 0, &ctx->dep_map, _RET_IP_, ctx, 1); | ||
686 | if (!ret && ctx->acquired > 1) | ||
687 | return ww_mutex_deadlock_injection(lock, ctx); | ||
688 | |||
689 | return ret; | ||
690 | } | ||
691 | EXPORT_SYMBOL_GPL(__ww_mutex_lock); | ||
692 | |||
693 | int __sched | ||
694 | __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | ||
695 | { | ||
696 | int ret; | ||
697 | |||
698 | might_sleep(); | ||
699 | ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, | ||
700 | 0, &ctx->dep_map, _RET_IP_, ctx, 1); | ||
701 | |||
702 | if (!ret && ctx->acquired > 1) | ||
703 | return ww_mutex_deadlock_injection(lock, ctx); | ||
704 | |||
705 | return ret; | ||
706 | } | ||
707 | EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); | ||
708 | |||
709 | #endif | ||
710 | |||
711 | /* | ||
712 | * Release the lock, slowpath: | ||
713 | */ | ||
714 | static inline void | ||
715 | __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | ||
716 | { | ||
717 | struct mutex *lock = container_of(lock_count, struct mutex, count); | ||
718 | unsigned long flags; | ||
719 | |||
720 | spin_lock_mutex(&lock->wait_lock, flags); | ||
721 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
722 | debug_mutex_unlock(lock); | ||
723 | |||
724 | /* | ||
725 | * some architectures leave the lock unlocked in the fastpath failure | ||
726 | * case, others need to leave it locked. In the later case we have to | ||
727 | * unlock it here | ||
728 | */ | ||
729 | if (__mutex_slowpath_needs_to_unlock()) | ||
730 | atomic_set(&lock->count, 1); | ||
731 | |||
732 | if (!list_empty(&lock->wait_list)) { | ||
733 | /* get the first entry from the wait-list: */ | ||
734 | struct mutex_waiter *waiter = | ||
735 | list_entry(lock->wait_list.next, | ||
736 | struct mutex_waiter, list); | ||
737 | |||
738 | debug_mutex_wake_waiter(lock, waiter); | ||
739 | |||
740 | wake_up_process(waiter->task); | ||
741 | } | ||
742 | |||
743 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * Release the lock, slowpath: | ||
748 | */ | ||
749 | static __used noinline void | ||
750 | __mutex_unlock_slowpath(atomic_t *lock_count) | ||
751 | { | ||
752 | __mutex_unlock_common_slowpath(lock_count, 1); | ||
753 | } | ||
754 | |||
755 | #ifndef CONFIG_DEBUG_LOCK_ALLOC | ||
756 | /* | ||
757 | * Here come the less common (and hence less performance-critical) APIs: | ||
758 | * mutex_lock_interruptible() and mutex_trylock(). | ||
759 | */ | ||
760 | static noinline int __sched | ||
761 | __mutex_lock_killable_slowpath(struct mutex *lock); | ||
762 | |||
763 | static noinline int __sched | ||
764 | __mutex_lock_interruptible_slowpath(struct mutex *lock); | ||
765 | |||
766 | /** | ||
767 | * mutex_lock_interruptible - acquire the mutex, interruptible | ||
768 | * @lock: the mutex to be acquired | ||
769 | * | ||
770 | * Lock the mutex like mutex_lock(), and return 0 if the mutex has | ||
771 | * been acquired or sleep until the mutex becomes available. If a | ||
772 | * signal arrives while waiting for the lock then this function | ||
773 | * returns -EINTR. | ||
774 | * | ||
775 | * This function is similar to (but not equivalent to) down_interruptible(). | ||
776 | */ | ||
777 | int __sched mutex_lock_interruptible(struct mutex *lock) | ||
778 | { | ||
779 | int ret; | ||
780 | |||
781 | might_sleep(); | ||
782 | ret = __mutex_fastpath_lock_retval(&lock->count); | ||
783 | if (likely(!ret)) { | ||
784 | mutex_set_owner(lock); | ||
785 | return 0; | ||
786 | } else | ||
787 | return __mutex_lock_interruptible_slowpath(lock); | ||
788 | } | ||
789 | |||
790 | EXPORT_SYMBOL(mutex_lock_interruptible); | ||
791 | |||
792 | int __sched mutex_lock_killable(struct mutex *lock) | ||
793 | { | ||
794 | int ret; | ||
795 | |||
796 | might_sleep(); | ||
797 | ret = __mutex_fastpath_lock_retval(&lock->count); | ||
798 | if (likely(!ret)) { | ||
799 | mutex_set_owner(lock); | ||
800 | return 0; | ||
801 | } else | ||
802 | return __mutex_lock_killable_slowpath(lock); | ||
803 | } | ||
804 | EXPORT_SYMBOL(mutex_lock_killable); | ||
805 | |||
806 | static __used noinline void __sched | ||
807 | __mutex_lock_slowpath(atomic_t *lock_count) | ||
808 | { | ||
809 | struct mutex *lock = container_of(lock_count, struct mutex, count); | ||
810 | |||
811 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, | ||
812 | NULL, _RET_IP_, NULL, 0); | ||
813 | } | ||
814 | |||
815 | static noinline int __sched | ||
816 | __mutex_lock_killable_slowpath(struct mutex *lock) | ||
817 | { | ||
818 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, | ||
819 | NULL, _RET_IP_, NULL, 0); | ||
820 | } | ||
821 | |||
822 | static noinline int __sched | ||
823 | __mutex_lock_interruptible_slowpath(struct mutex *lock) | ||
824 | { | ||
825 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, | ||
826 | NULL, _RET_IP_, NULL, 0); | ||
827 | } | ||
828 | |||
829 | static noinline int __sched | ||
830 | __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | ||
831 | { | ||
832 | return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, | ||
833 | NULL, _RET_IP_, ctx, 1); | ||
834 | } | ||
835 | |||
836 | static noinline int __sched | ||
837 | __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, | ||
838 | struct ww_acquire_ctx *ctx) | ||
839 | { | ||
840 | return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, | ||
841 | NULL, _RET_IP_, ctx, 1); | ||
842 | } | ||
843 | |||
844 | #endif | ||
845 | |||
846 | /* | ||
847 | * Spinlock based trylock, we take the spinlock and check whether we | ||
848 | * can get the lock: | ||
849 | */ | ||
850 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | ||
851 | { | ||
852 | struct mutex *lock = container_of(lock_count, struct mutex, count); | ||
853 | unsigned long flags; | ||
854 | int prev; | ||
855 | |||
856 | spin_lock_mutex(&lock->wait_lock, flags); | ||
857 | |||
858 | prev = atomic_xchg(&lock->count, -1); | ||
859 | if (likely(prev == 1)) { | ||
860 | mutex_set_owner(lock); | ||
861 | mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | ||
862 | } | ||
863 | |||
864 | /* Set it back to 0 if there are no waiters: */ | ||
865 | if (likely(list_empty(&lock->wait_list))) | ||
866 | atomic_set(&lock->count, 0); | ||
867 | |||
868 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
869 | |||
870 | return prev == 1; | ||
871 | } | ||
872 | |||
873 | /** | ||
874 | * mutex_trylock - try to acquire the mutex, without waiting | ||
875 | * @lock: the mutex to be acquired | ||
876 | * | ||
877 | * Try to acquire the mutex atomically. Returns 1 if the mutex | ||
878 | * has been acquired successfully, and 0 on contention. | ||
879 | * | ||
880 | * NOTE: this function follows the spin_trylock() convention, so | ||
881 | * it is negated from the down_trylock() return values! Be careful | ||
882 | * about this when converting semaphore users to mutexes. | ||
883 | * | ||
884 | * This function must not be used in interrupt context. The | ||
885 | * mutex must be released by the same task that acquired it. | ||
886 | */ | ||
887 | int __sched mutex_trylock(struct mutex *lock) | ||
888 | { | ||
889 | int ret; | ||
890 | |||
891 | ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); | ||
892 | if (ret) | ||
893 | mutex_set_owner(lock); | ||
894 | |||
895 | return ret; | ||
896 | } | ||
897 | EXPORT_SYMBOL(mutex_trylock); | ||
898 | |||
899 | #ifndef CONFIG_DEBUG_LOCK_ALLOC | ||
900 | int __sched | ||
901 | __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | ||
902 | { | ||
903 | int ret; | ||
904 | |||
905 | might_sleep(); | ||
906 | |||
907 | ret = __mutex_fastpath_lock_retval(&lock->base.count); | ||
908 | |||
909 | if (likely(!ret)) { | ||
910 | ww_mutex_set_context_fastpath(lock, ctx); | ||
911 | mutex_set_owner(&lock->base); | ||
912 | } else | ||
913 | ret = __ww_mutex_lock_slowpath(lock, ctx); | ||
914 | return ret; | ||
915 | } | ||
916 | EXPORT_SYMBOL(__ww_mutex_lock); | ||
917 | |||
918 | int __sched | ||
919 | __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | ||
920 | { | ||
921 | int ret; | ||
922 | |||
923 | might_sleep(); | ||
924 | |||
925 | ret = __mutex_fastpath_lock_retval(&lock->base.count); | ||
926 | |||
927 | if (likely(!ret)) { | ||
928 | ww_mutex_set_context_fastpath(lock, ctx); | ||
929 | mutex_set_owner(&lock->base); | ||
930 | } else | ||
931 | ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); | ||
932 | return ret; | ||
933 | } | ||
934 | EXPORT_SYMBOL(__ww_mutex_lock_interruptible); | ||
935 | |||
936 | #endif | ||
937 | |||
938 | /** | ||
939 | * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | ||
940 | * @cnt: the atomic which we are to dec | ||
941 | * @lock: the mutex to return holding if we dec to 0 | ||
942 | * | ||
943 | * return true and hold lock if we dec to 0, return false otherwise | ||
944 | */ | ||
945 | int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | ||
946 | { | ||
947 | /* dec if we can't possibly hit 0 */ | ||
948 | if (atomic_add_unless(cnt, -1, 1)) | ||
949 | return 0; | ||
950 | /* we might hit 0, so take the lock */ | ||
951 | mutex_lock(lock); | ||
952 | if (!atomic_dec_and_test(cnt)) { | ||
953 | /* when we actually did the dec, we didn't hit 0 */ | ||
954 | mutex_unlock(lock); | ||
955 | return 0; | ||
956 | } | ||
957 | /* we hit 0, and we hold the lock */ | ||
958 | return 1; | ||
959 | } | ||
960 | EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | ||
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h new file mode 100644 index 000000000000..4115fbf83b12 --- /dev/null +++ b/kernel/locking/mutex.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * Mutexes: blocking mutual exclusion locks | ||
3 | * | ||
4 | * started by Ingo Molnar: | ||
5 | * | ||
6 | * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * | ||
8 | * This file contains mutex debugging related internal prototypes, for the | ||
9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: | ||
10 | */ | ||
11 | |||
12 | #define spin_lock_mutex(lock, flags) \ | ||
13 | do { spin_lock(lock); (void)(flags); } while (0) | ||
14 | #define spin_unlock_mutex(lock, flags) \ | ||
15 | do { spin_unlock(lock); (void)(flags); } while (0) | ||
16 | #define mutex_remove_waiter(lock, waiter, ti) \ | ||
17 | __list_del((waiter)->list.prev, (waiter)->list.next) | ||
18 | |||
19 | #ifdef CONFIG_SMP | ||
20 | static inline void mutex_set_owner(struct mutex *lock) | ||
21 | { | ||
22 | lock->owner = current; | ||
23 | } | ||
24 | |||
25 | static inline void mutex_clear_owner(struct mutex *lock) | ||
26 | { | ||
27 | lock->owner = NULL; | ||
28 | } | ||
29 | #else | ||
30 | static inline void mutex_set_owner(struct mutex *lock) | ||
31 | { | ||
32 | } | ||
33 | |||
34 | static inline void mutex_clear_owner(struct mutex *lock) | ||
35 | { | ||
36 | } | ||
37 | #endif | ||
38 | |||
39 | #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) | ||
40 | #define debug_mutex_free_waiter(waiter) do { } while (0) | ||
41 | #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) | ||
42 | #define debug_mutex_unlock(lock) do { } while (0) | ||
43 | #define debug_mutex_init(lock, name, key) do { } while (0) | ||
44 | |||
45 | static inline void | ||
46 | debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) | ||
47 | { | ||
48 | } | ||
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c new file mode 100644 index 000000000000..652a8ee8efe9 --- /dev/null +++ b/kernel/locking/percpu-rwsem.c | |||
@@ -0,0 +1,165 @@ | |||
1 | #include <linux/atomic.h> | ||
2 | #include <linux/rwsem.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/wait.h> | ||
5 | #include <linux/lockdep.h> | ||
6 | #include <linux/percpu-rwsem.h> | ||
7 | #include <linux/rcupdate.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/errno.h> | ||
10 | |||
11 | int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, | ||
12 | const char *name, struct lock_class_key *rwsem_key) | ||
13 | { | ||
14 | brw->fast_read_ctr = alloc_percpu(int); | ||
15 | if (unlikely(!brw->fast_read_ctr)) | ||
16 | return -ENOMEM; | ||
17 | |||
18 | /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ | ||
19 | __init_rwsem(&brw->rw_sem, name, rwsem_key); | ||
20 | atomic_set(&brw->write_ctr, 0); | ||
21 | atomic_set(&brw->slow_read_ctr, 0); | ||
22 | init_waitqueue_head(&brw->write_waitq); | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | ||
27 | { | ||
28 | free_percpu(brw->fast_read_ctr); | ||
29 | brw->fast_read_ctr = NULL; /* catch use after free bugs */ | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * This is the fast-path for down_read/up_read, it only needs to ensure | ||
34 | * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the | ||
35 | * fast per-cpu counter. The writer uses synchronize_sched_expedited() to | ||
36 | * serialize with the preempt-disabled section below. | ||
37 | * | ||
38 | * The nontrivial part is that we should guarantee acquire/release semantics | ||
39 | * in case when | ||
40 | * | ||
41 | * R_W: down_write() comes after up_read(), the writer should see all | ||
42 | * changes done by the reader | ||
43 | * or | ||
44 | * W_R: down_read() comes after up_write(), the reader should see all | ||
45 | * changes done by the writer | ||
46 | * | ||
47 | * If this helper fails the callers rely on the normal rw_semaphore and | ||
48 | * atomic_dec_and_test(), so in this case we have the necessary barriers. | ||
49 | * | ||
50 | * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or | ||
51 | * __this_cpu_add() below can be reordered with any LOAD/STORE done by the | ||
52 | * reader inside the critical section. See the comments in down_write and | ||
53 | * up_write below. | ||
54 | */ | ||
55 | static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) | ||
56 | { | ||
57 | bool success = false; | ||
58 | |||
59 | preempt_disable(); | ||
60 | if (likely(!atomic_read(&brw->write_ctr))) { | ||
61 | __this_cpu_add(*brw->fast_read_ctr, val); | ||
62 | success = true; | ||
63 | } | ||
64 | preempt_enable(); | ||
65 | |||
66 | return success; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * Like the normal down_read() this is not recursive, the writer can | ||
71 | * come after the first percpu_down_read() and create the deadlock. | ||
72 | * | ||
73 | * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, | ||
74 | * percpu_up_read() does rwsem_release(). This pairs with the usage | ||
75 | * of ->rw_sem in percpu_down/up_write(). | ||
76 | */ | ||
77 | void percpu_down_read(struct percpu_rw_semaphore *brw) | ||
78 | { | ||
79 | might_sleep(); | ||
80 | if (likely(update_fast_ctr(brw, +1))) { | ||
81 | rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | down_read(&brw->rw_sem); | ||
86 | atomic_inc(&brw->slow_read_ctr); | ||
87 | /* avoid up_read()->rwsem_release() */ | ||
88 | __up_read(&brw->rw_sem); | ||
89 | } | ||
90 | |||
91 | void percpu_up_read(struct percpu_rw_semaphore *brw) | ||
92 | { | ||
93 | rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); | ||
94 | |||
95 | if (likely(update_fast_ctr(brw, -1))) | ||
96 | return; | ||
97 | |||
98 | /* false-positive is possible but harmless */ | ||
99 | if (atomic_dec_and_test(&brw->slow_read_ctr)) | ||
100 | wake_up_all(&brw->write_waitq); | ||
101 | } | ||
102 | |||
103 | static int clear_fast_ctr(struct percpu_rw_semaphore *brw) | ||
104 | { | ||
105 | unsigned int sum = 0; | ||
106 | int cpu; | ||
107 | |||
108 | for_each_possible_cpu(cpu) { | ||
109 | sum += per_cpu(*brw->fast_read_ctr, cpu); | ||
110 | per_cpu(*brw->fast_read_ctr, cpu) = 0; | ||
111 | } | ||
112 | |||
113 | return sum; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * A writer increments ->write_ctr to force the readers to switch to the | ||
118 | * slow mode, note the atomic_read() check in update_fast_ctr(). | ||
119 | * | ||
120 | * After that the readers can only inc/dec the slow ->slow_read_ctr counter, | ||
121 | * ->fast_read_ctr is stable. Once the writer moves its sum into the slow | ||
122 | * counter it represents the number of active readers. | ||
123 | * | ||
124 | * Finally the writer takes ->rw_sem for writing and blocks the new readers, | ||
125 | * then waits until the slow counter becomes zero. | ||
126 | */ | ||
127 | void percpu_down_write(struct percpu_rw_semaphore *brw) | ||
128 | { | ||
129 | /* tell update_fast_ctr() there is a pending writer */ | ||
130 | atomic_inc(&brw->write_ctr); | ||
131 | /* | ||
132 | * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read | ||
133 | * so that update_fast_ctr() can't succeed. | ||
134 | * | ||
135 | * 2. Ensures we see the result of every previous this_cpu_add() in | ||
136 | * update_fast_ctr(). | ||
137 | * | ||
138 | * 3. Ensures that if any reader has exited its critical section via | ||
139 | * fast-path, it executes a full memory barrier before we return. | ||
140 | * See R_W case in the comment above update_fast_ctr(). | ||
141 | */ | ||
142 | synchronize_sched_expedited(); | ||
143 | |||
144 | /* exclude other writers, and block the new readers completely */ | ||
145 | down_write(&brw->rw_sem); | ||
146 | |||
147 | /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ | ||
148 | atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); | ||
149 | |||
150 | /* wait for all readers to complete their percpu_up_read() */ | ||
151 | wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); | ||
152 | } | ||
153 | |||
154 | void percpu_up_write(struct percpu_rw_semaphore *brw) | ||
155 | { | ||
156 | /* release the lock, but the readers can't use the fast-path */ | ||
157 | up_write(&brw->rw_sem); | ||
158 | /* | ||
159 | * Insert the barrier before the next fast-path in down_read, | ||
160 | * see W_R case in the comment above update_fast_ctr(). | ||
161 | */ | ||
162 | synchronize_sched_expedited(); | ||
163 | /* the last writer unblocks update_fast_ctr() */ | ||
164 | atomic_dec(&brw->write_ctr); | ||
165 | } | ||
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c new file mode 100644 index 000000000000..13b243a323fa --- /dev/null +++ b/kernel/locking/rtmutex-debug.c | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * RT-Mutexes: blocking mutual exclusion locks with PI support | ||
3 | * | ||
4 | * started by Ingo Molnar and Thomas Gleixner: | ||
5 | * | ||
6 | * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * | ||
9 | * This code is based on the rt.c implementation in the preempt-rt tree. | ||
10 | * Portions of said code are | ||
11 | * | ||
12 | * Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey | ||
13 | * Copyright (C) 2006 Esben Nielsen | ||
14 | * Copyright (C) 2006 Kihon Technologies Inc., | ||
15 | * Steven Rostedt <rostedt@goodmis.org> | ||
16 | * | ||
17 | * See rt.c in preempt-rt for proper credits and further information | ||
18 | */ | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/sched/rt.h> | ||
21 | #include <linux/delay.h> | ||
22 | #include <linux/export.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/kallsyms.h> | ||
25 | #include <linux/syscalls.h> | ||
26 | #include <linux/interrupt.h> | ||
27 | #include <linux/plist.h> | ||
28 | #include <linux/fs.h> | ||
29 | #include <linux/debug_locks.h> | ||
30 | |||
31 | #include "rtmutex_common.h" | ||
32 | |||
33 | static void printk_task(struct task_struct *p) | ||
34 | { | ||
35 | if (p) | ||
36 | printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio); | ||
37 | else | ||
38 | printk("<none>"); | ||
39 | } | ||
40 | |||
41 | static void printk_lock(struct rt_mutex *lock, int print_owner) | ||
42 | { | ||
43 | if (lock->name) | ||
44 | printk(" [%p] {%s}\n", | ||
45 | lock, lock->name); | ||
46 | else | ||
47 | printk(" [%p] {%s:%d}\n", | ||
48 | lock, lock->file, lock->line); | ||
49 | |||
50 | if (print_owner && rt_mutex_owner(lock)) { | ||
51 | printk(".. ->owner: %p\n", lock->owner); | ||
52 | printk(".. held by: "); | ||
53 | printk_task(rt_mutex_owner(lock)); | ||
54 | printk("\n"); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | void rt_mutex_debug_task_free(struct task_struct *task) | ||
59 | { | ||
60 | DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); | ||
61 | DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * We fill out the fields in the waiter to store the information about | ||
66 | * the deadlock. We print when we return. act_waiter can be NULL in | ||
67 | * case of a remove waiter operation. | ||
68 | */ | ||
69 | void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, | ||
70 | struct rt_mutex *lock) | ||
71 | { | ||
72 | struct task_struct *task; | ||
73 | |||
74 | if (!debug_locks || detect || !act_waiter) | ||
75 | return; | ||
76 | |||
77 | task = rt_mutex_owner(act_waiter->lock); | ||
78 | if (task && task != current) { | ||
79 | act_waiter->deadlock_task_pid = get_pid(task_pid(task)); | ||
80 | act_waiter->deadlock_lock = lock; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | ||
85 | { | ||
86 | struct task_struct *task; | ||
87 | |||
88 | if (!waiter->deadlock_lock || !debug_locks) | ||
89 | return; | ||
90 | |||
91 | rcu_read_lock(); | ||
92 | task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); | ||
93 | if (!task) { | ||
94 | rcu_read_unlock(); | ||
95 | return; | ||
96 | } | ||
97 | |||
98 | if (!debug_locks_off()) { | ||
99 | rcu_read_unlock(); | ||
100 | return; | ||
101 | } | ||
102 | |||
103 | printk("\n============================================\n"); | ||
104 | printk( "[ BUG: circular locking deadlock detected! ]\n"); | ||
105 | printk("%s\n", print_tainted()); | ||
106 | printk( "--------------------------------------------\n"); | ||
107 | printk("%s/%d is deadlocking current task %s/%d\n\n", | ||
108 | task->comm, task_pid_nr(task), | ||
109 | current->comm, task_pid_nr(current)); | ||
110 | |||
111 | printk("\n1) %s/%d is trying to acquire this lock:\n", | ||
112 | current->comm, task_pid_nr(current)); | ||
113 | printk_lock(waiter->lock, 1); | ||
114 | |||
115 | printk("\n2) %s/%d is blocked on this lock:\n", | ||
116 | task->comm, task_pid_nr(task)); | ||
117 | printk_lock(waiter->deadlock_lock, 1); | ||
118 | |||
119 | debug_show_held_locks(current); | ||
120 | debug_show_held_locks(task); | ||
121 | |||
122 | printk("\n%s/%d's [blocked] stackdump:\n\n", | ||
123 | task->comm, task_pid_nr(task)); | ||
124 | show_stack(task, NULL); | ||
125 | printk("\n%s/%d's [current] stackdump:\n\n", | ||
126 | current->comm, task_pid_nr(current)); | ||
127 | dump_stack(); | ||
128 | debug_show_all_locks(); | ||
129 | rcu_read_unlock(); | ||
130 | |||
131 | printk("[ turning off deadlock detection." | ||
132 | "Please report this trace. ]\n\n"); | ||
133 | } | ||
134 | |||
135 | void debug_rt_mutex_lock(struct rt_mutex *lock) | ||
136 | { | ||
137 | } | ||
138 | |||
139 | void debug_rt_mutex_unlock(struct rt_mutex *lock) | ||
140 | { | ||
141 | DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); | ||
142 | } | ||
143 | |||
144 | void | ||
145 | debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) | ||
146 | { | ||
147 | } | ||
148 | |||
149 | void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) | ||
150 | { | ||
151 | DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); | ||
152 | } | ||
153 | |||
154 | void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) | ||
155 | { | ||
156 | memset(waiter, 0x11, sizeof(*waiter)); | ||
157 | plist_node_init(&waiter->list_entry, MAX_PRIO); | ||
158 | plist_node_init(&waiter->pi_list_entry, MAX_PRIO); | ||
159 | waiter->deadlock_task_pid = NULL; | ||
160 | } | ||
161 | |||
162 | void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) | ||
163 | { | ||
164 | put_pid(waiter->deadlock_task_pid); | ||
165 | DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry)); | ||
166 | DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); | ||
167 | memset(waiter, 0x22, sizeof(*waiter)); | ||
168 | } | ||
169 | |||
170 | void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) | ||
171 | { | ||
172 | /* | ||
173 | * Make sure we are not reinitializing a held lock: | ||
174 | */ | ||
175 | debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | ||
176 | lock->name = name; | ||
177 | } | ||
178 | |||
179 | void | ||
180 | rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) | ||
181 | { | ||
182 | } | ||
183 | |||
184 | void rt_mutex_deadlock_account_unlock(struct task_struct *task) | ||
185 | { | ||
186 | } | ||
187 | |||
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h new file mode 100644 index 000000000000..14193d596d78 --- /dev/null +++ b/kernel/locking/rtmutex-debug.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * RT-Mutexes: blocking mutual exclusion locks with PI support | ||
3 | * | ||
4 | * started by Ingo Molnar and Thomas Gleixner: | ||
5 | * | ||
6 | * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * | ||
9 | * This file contains macros used solely by rtmutex.c. Debug version. | ||
10 | */ | ||
11 | |||
12 | extern void | ||
13 | rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); | ||
14 | extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); | ||
15 | extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); | ||
16 | extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); | ||
17 | extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); | ||
18 | extern void debug_rt_mutex_lock(struct rt_mutex *lock); | ||
19 | extern void debug_rt_mutex_unlock(struct rt_mutex *lock); | ||
20 | extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, | ||
21 | struct task_struct *powner); | ||
22 | extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); | ||
23 | extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, | ||
24 | struct rt_mutex *lock); | ||
25 | extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); | ||
26 | # define debug_rt_mutex_reset_waiter(w) \ | ||
27 | do { (w)->deadlock_lock = NULL; } while (0) | ||
28 | |||
29 | static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, | ||
30 | int detect) | ||
31 | { | ||
32 | return (waiter != NULL); | ||
33 | } | ||
diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c new file mode 100644 index 000000000000..1d96dd0d93c1 --- /dev/null +++ b/kernel/locking/rtmutex-tester.c | |||
@@ -0,0 +1,420 @@ | |||
1 | /* | ||
2 | * RT-Mutex-tester: scriptable tester for rt mutexes | ||
3 | * | ||
4 | * started by Thomas Gleixner: | ||
5 | * | ||
6 | * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
7 | * | ||
8 | */ | ||
9 | #include <linux/device.h> | ||
10 | #include <linux/kthread.h> | ||
11 | #include <linux/export.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/sched/rt.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/timer.h> | ||
16 | #include <linux/freezer.h> | ||
17 | #include <linux/stat.h> | ||
18 | |||
19 | #include "rtmutex.h" | ||
20 | |||
21 | #define MAX_RT_TEST_THREADS 8 | ||
22 | #define MAX_RT_TEST_MUTEXES 8 | ||
23 | |||
24 | static spinlock_t rttest_lock; | ||
25 | static atomic_t rttest_event; | ||
26 | |||
27 | struct test_thread_data { | ||
28 | int opcode; | ||
29 | int opdata; | ||
30 | int mutexes[MAX_RT_TEST_MUTEXES]; | ||
31 | int event; | ||
32 | struct device dev; | ||
33 | }; | ||
34 | |||
35 | static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; | ||
36 | static struct task_struct *threads[MAX_RT_TEST_THREADS]; | ||
37 | static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; | ||
38 | |||
39 | enum test_opcodes { | ||
40 | RTTEST_NOP = 0, | ||
41 | RTTEST_SCHEDOT, /* 1 Sched other, data = nice */ | ||
42 | RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */ | ||
43 | RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */ | ||
44 | RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */ | ||
45 | RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */ | ||
46 | RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ | ||
47 | RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ | ||
48 | RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ | ||
49 | /* 9, 10 - reserved for BKL commemoration */ | ||
50 | RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */ | ||
51 | RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ | ||
52 | RTTEST_RESET = 99, /* 99 Reset all pending operations */ | ||
53 | }; | ||
54 | |||
55 | static int handle_op(struct test_thread_data *td, int lockwakeup) | ||
56 | { | ||
57 | int i, id, ret = -EINVAL; | ||
58 | |||
59 | switch(td->opcode) { | ||
60 | |||
61 | case RTTEST_NOP: | ||
62 | return 0; | ||
63 | |||
64 | case RTTEST_LOCKCONT: | ||
65 | td->mutexes[td->opdata] = 1; | ||
66 | td->event = atomic_add_return(1, &rttest_event); | ||
67 | return 0; | ||
68 | |||
69 | case RTTEST_RESET: | ||
70 | for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) { | ||
71 | if (td->mutexes[i] == 4) { | ||
72 | rt_mutex_unlock(&mutexes[i]); | ||
73 | td->mutexes[i] = 0; | ||
74 | } | ||
75 | } | ||
76 | return 0; | ||
77 | |||
78 | case RTTEST_RESETEVENT: | ||
79 | atomic_set(&rttest_event, 0); | ||
80 | return 0; | ||
81 | |||
82 | default: | ||
83 | if (lockwakeup) | ||
84 | return ret; | ||
85 | } | ||
86 | |||
87 | switch(td->opcode) { | ||
88 | |||
89 | case RTTEST_LOCK: | ||
90 | case RTTEST_LOCKNOWAIT: | ||
91 | id = td->opdata; | ||
92 | if (id < 0 || id >= MAX_RT_TEST_MUTEXES) | ||
93 | return ret; | ||
94 | |||
95 | td->mutexes[id] = 1; | ||
96 | td->event = atomic_add_return(1, &rttest_event); | ||
97 | rt_mutex_lock(&mutexes[id]); | ||
98 | td->event = atomic_add_return(1, &rttest_event); | ||
99 | td->mutexes[id] = 4; | ||
100 | return 0; | ||
101 | |||
102 | case RTTEST_LOCKINT: | ||
103 | case RTTEST_LOCKINTNOWAIT: | ||
104 | id = td->opdata; | ||
105 | if (id < 0 || id >= MAX_RT_TEST_MUTEXES) | ||
106 | return ret; | ||
107 | |||
108 | td->mutexes[id] = 1; | ||
109 | td->event = atomic_add_return(1, &rttest_event); | ||
110 | ret = rt_mutex_lock_interruptible(&mutexes[id], 0); | ||
111 | td->event = atomic_add_return(1, &rttest_event); | ||
112 | td->mutexes[id] = ret ? 0 : 4; | ||
113 | return ret ? -EINTR : 0; | ||
114 | |||
115 | case RTTEST_UNLOCK: | ||
116 | id = td->opdata; | ||
117 | if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4) | ||
118 | return ret; | ||
119 | |||
120 | td->event = atomic_add_return(1, &rttest_event); | ||
121 | rt_mutex_unlock(&mutexes[id]); | ||
122 | td->event = atomic_add_return(1, &rttest_event); | ||
123 | td->mutexes[id] = 0; | ||
124 | return 0; | ||
125 | |||
126 | default: | ||
127 | break; | ||
128 | } | ||
129 | return ret; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Schedule replacement for rtsem_down(). Only called for threads with | ||
134 | * PF_MUTEX_TESTER set. | ||
135 | * | ||
136 | * This allows us to have finegrained control over the event flow. | ||
137 | * | ||
138 | */ | ||
139 | void schedule_rt_mutex_test(struct rt_mutex *mutex) | ||
140 | { | ||
141 | int tid, op, dat; | ||
142 | struct test_thread_data *td; | ||
143 | |||
144 | /* We have to lookup the task */ | ||
145 | for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) { | ||
146 | if (threads[tid] == current) | ||
147 | break; | ||
148 | } | ||
149 | |||
150 | BUG_ON(tid == MAX_RT_TEST_THREADS); | ||
151 | |||
152 | td = &thread_data[tid]; | ||
153 | |||
154 | op = td->opcode; | ||
155 | dat = td->opdata; | ||
156 | |||
157 | switch (op) { | ||
158 | case RTTEST_LOCK: | ||
159 | case RTTEST_LOCKINT: | ||
160 | case RTTEST_LOCKNOWAIT: | ||
161 | case RTTEST_LOCKINTNOWAIT: | ||
162 | if (mutex != &mutexes[dat]) | ||
163 | break; | ||
164 | |||
165 | if (td->mutexes[dat] != 1) | ||
166 | break; | ||
167 | |||
168 | td->mutexes[dat] = 2; | ||
169 | td->event = atomic_add_return(1, &rttest_event); | ||
170 | break; | ||
171 | |||
172 | default: | ||
173 | break; | ||
174 | } | ||
175 | |||
176 | schedule(); | ||
177 | |||
178 | |||
179 | switch (op) { | ||
180 | case RTTEST_LOCK: | ||
181 | case RTTEST_LOCKINT: | ||
182 | if (mutex != &mutexes[dat]) | ||
183 | return; | ||
184 | |||
185 | if (td->mutexes[dat] != 2) | ||
186 | return; | ||
187 | |||
188 | td->mutexes[dat] = 3; | ||
189 | td->event = atomic_add_return(1, &rttest_event); | ||
190 | break; | ||
191 | |||
192 | case RTTEST_LOCKNOWAIT: | ||
193 | case RTTEST_LOCKINTNOWAIT: | ||
194 | if (mutex != &mutexes[dat]) | ||
195 | return; | ||
196 | |||
197 | if (td->mutexes[dat] != 2) | ||
198 | return; | ||
199 | |||
200 | td->mutexes[dat] = 1; | ||
201 | td->event = atomic_add_return(1, &rttest_event); | ||
202 | return; | ||
203 | |||
204 | default: | ||
205 | return; | ||
206 | } | ||
207 | |||
208 | td->opcode = 0; | ||
209 | |||
210 | for (;;) { | ||
211 | set_current_state(TASK_INTERRUPTIBLE); | ||
212 | |||
213 | if (td->opcode > 0) { | ||
214 | int ret; | ||
215 | |||
216 | set_current_state(TASK_RUNNING); | ||
217 | ret = handle_op(td, 1); | ||
218 | set_current_state(TASK_INTERRUPTIBLE); | ||
219 | if (td->opcode == RTTEST_LOCKCONT) | ||
220 | break; | ||
221 | td->opcode = ret; | ||
222 | } | ||
223 | |||
224 | /* Wait for the next command to be executed */ | ||
225 | schedule(); | ||
226 | } | ||
227 | |||
228 | /* Restore previous command and data */ | ||
229 | td->opcode = op; | ||
230 | td->opdata = dat; | ||
231 | } | ||
232 | |||
233 | static int test_func(void *data) | ||
234 | { | ||
235 | struct test_thread_data *td = data; | ||
236 | int ret; | ||
237 | |||
238 | current->flags |= PF_MUTEX_TESTER; | ||
239 | set_freezable(); | ||
240 | allow_signal(SIGHUP); | ||
241 | |||
242 | for(;;) { | ||
243 | |||
244 | set_current_state(TASK_INTERRUPTIBLE); | ||
245 | |||
246 | if (td->opcode > 0) { | ||
247 | set_current_state(TASK_RUNNING); | ||
248 | ret = handle_op(td, 0); | ||
249 | set_current_state(TASK_INTERRUPTIBLE); | ||
250 | td->opcode = ret; | ||
251 | } | ||
252 | |||
253 | /* Wait for the next command to be executed */ | ||
254 | schedule(); | ||
255 | try_to_freeze(); | ||
256 | |||
257 | if (signal_pending(current)) | ||
258 | flush_signals(current); | ||
259 | |||
260 | if(kthread_should_stop()) | ||
261 | break; | ||
262 | } | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | /** | ||
267 | * sysfs_test_command - interface for test commands | ||
268 | * @dev: thread reference | ||
269 | * @buf: command for actual step | ||
270 | * @count: length of buffer | ||
271 | * | ||
272 | * command syntax: | ||
273 | * | ||
274 | * opcode:data | ||
275 | */ | ||
276 | static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr, | ||
277 | const char *buf, size_t count) | ||
278 | { | ||
279 | struct sched_param schedpar; | ||
280 | struct test_thread_data *td; | ||
281 | char cmdbuf[32]; | ||
282 | int op, dat, tid, ret; | ||
283 | |||
284 | td = container_of(dev, struct test_thread_data, dev); | ||
285 | tid = td->dev.id; | ||
286 | |||
287 | /* strings from sysfs write are not 0 terminated! */ | ||
288 | if (count >= sizeof(cmdbuf)) | ||
289 | return -EINVAL; | ||
290 | |||
291 | /* strip of \n: */ | ||
292 | if (buf[count-1] == '\n') | ||
293 | count--; | ||
294 | if (count < 1) | ||
295 | return -EINVAL; | ||
296 | |||
297 | memcpy(cmdbuf, buf, count); | ||
298 | cmdbuf[count] = 0; | ||
299 | |||
300 | if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2) | ||
301 | return -EINVAL; | ||
302 | |||
303 | switch (op) { | ||
304 | case RTTEST_SCHEDOT: | ||
305 | schedpar.sched_priority = 0; | ||
306 | ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar); | ||
307 | if (ret) | ||
308 | return ret; | ||
309 | set_user_nice(current, 0); | ||
310 | break; | ||
311 | |||
312 | case RTTEST_SCHEDRT: | ||
313 | schedpar.sched_priority = dat; | ||
314 | ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar); | ||
315 | if (ret) | ||
316 | return ret; | ||
317 | break; | ||
318 | |||
319 | case RTTEST_SIGNAL: | ||
320 | send_sig(SIGHUP, threads[tid], 0); | ||
321 | break; | ||
322 | |||
323 | default: | ||
324 | if (td->opcode > 0) | ||
325 | return -EBUSY; | ||
326 | td->opdata = dat; | ||
327 | td->opcode = op; | ||
328 | wake_up_process(threads[tid]); | ||
329 | } | ||
330 | |||
331 | return count; | ||
332 | } | ||
333 | |||
334 | /** | ||
335 | * sysfs_test_status - sysfs interface for rt tester | ||
336 | * @dev: thread to query | ||
337 | * @buf: char buffer to be filled with thread status info | ||
338 | */ | ||
339 | static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr, | ||
340 | char *buf) | ||
341 | { | ||
342 | struct test_thread_data *td; | ||
343 | struct task_struct *tsk; | ||
344 | char *curr = buf; | ||
345 | int i; | ||
346 | |||
347 | td = container_of(dev, struct test_thread_data, dev); | ||
348 | tsk = threads[td->dev.id]; | ||
349 | |||
350 | spin_lock(&rttest_lock); | ||
351 | |||
352 | curr += sprintf(curr, | ||
353 | "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:", | ||
354 | td->opcode, td->event, tsk->state, | ||
355 | (MAX_RT_PRIO - 1) - tsk->prio, | ||
356 | (MAX_RT_PRIO - 1) - tsk->normal_prio, | ||
357 | tsk->pi_blocked_on); | ||
358 | |||
359 | for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) | ||
360 | curr += sprintf(curr, "%d", td->mutexes[i]); | ||
361 | |||
362 | spin_unlock(&rttest_lock); | ||
363 | |||
364 | curr += sprintf(curr, ", T: %p, R: %p\n", tsk, | ||
365 | mutexes[td->dev.id].owner); | ||
366 | |||
367 | return curr - buf; | ||
368 | } | ||
369 | |||
370 | static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL); | ||
371 | static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command); | ||
372 | |||
373 | static struct bus_type rttest_subsys = { | ||
374 | .name = "rttest", | ||
375 | .dev_name = "rttest", | ||
376 | }; | ||
377 | |||
378 | static int init_test_thread(int id) | ||
379 | { | ||
380 | thread_data[id].dev.bus = &rttest_subsys; | ||
381 | thread_data[id].dev.id = id; | ||
382 | |||
383 | threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); | ||
384 | if (IS_ERR(threads[id])) | ||
385 | return PTR_ERR(threads[id]); | ||
386 | |||
387 | return device_register(&thread_data[id].dev); | ||
388 | } | ||
389 | |||
390 | static int init_rttest(void) | ||
391 | { | ||
392 | int ret, i; | ||
393 | |||
394 | spin_lock_init(&rttest_lock); | ||
395 | |||
396 | for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) | ||
397 | rt_mutex_init(&mutexes[i]); | ||
398 | |||
399 | ret = subsys_system_register(&rttest_subsys, NULL); | ||
400 | if (ret) | ||
401 | return ret; | ||
402 | |||
403 | for (i = 0; i < MAX_RT_TEST_THREADS; i++) { | ||
404 | ret = init_test_thread(i); | ||
405 | if (ret) | ||
406 | break; | ||
407 | ret = device_create_file(&thread_data[i].dev, &dev_attr_status); | ||
408 | if (ret) | ||
409 | break; | ||
410 | ret = device_create_file(&thread_data[i].dev, &dev_attr_command); | ||
411 | if (ret) | ||
412 | break; | ||
413 | } | ||
414 | |||
415 | printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" ); | ||
416 | |||
417 | return ret; | ||
418 | } | ||
419 | |||
420 | device_initcall(init_rttest); | ||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c new file mode 100644 index 000000000000..0dd6aec1cb6a --- /dev/null +++ b/kernel/locking/rtmutex.c | |||
@@ -0,0 +1,1060 @@ | |||
1 | /* | ||
2 | * RT-Mutexes: simple blocking mutual exclusion locks with PI support | ||
3 | * | ||
4 | * started by Ingo Molnar and Thomas Gleixner. | ||
5 | * | ||
6 | * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | ||
9 | * Copyright (C) 2006 Esben Nielsen | ||
10 | * | ||
11 | * See Documentation/rt-mutex-design.txt for details. | ||
12 | */ | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/export.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/sched/rt.h> | ||
17 | #include <linux/timer.h> | ||
18 | |||
19 | #include "rtmutex_common.h" | ||
20 | |||
21 | /* | ||
22 | * lock->owner state tracking: | ||
23 | * | ||
24 | * lock->owner holds the task_struct pointer of the owner. Bit 0 | ||
25 | * is used to keep track of the "lock has waiters" state. | ||
26 | * | ||
27 | * owner bit0 | ||
28 | * NULL 0 lock is free (fast acquire possible) | ||
29 | * NULL 1 lock is free and has waiters and the top waiter | ||
30 | * is going to take the lock* | ||
31 | * taskpointer 0 lock is held (fast release possible) | ||
32 | * taskpointer 1 lock is held and has waiters** | ||
33 | * | ||
34 | * The fast atomic compare exchange based acquire and release is only | ||
35 | * possible when bit 0 of lock->owner is 0. | ||
36 | * | ||
37 | * (*) It also can be a transitional state when grabbing the lock | ||
38 | * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, | ||
39 | * we need to set the bit0 before looking at the lock, and the owner may be | ||
40 | * NULL in this small time, hence this can be a transitional state. | ||
41 | * | ||
42 | * (**) There is a small time when bit 0 is set but there are no | ||
43 | * waiters. This can happen when grabbing the lock in the slow path. | ||
44 | * To prevent a cmpxchg of the owner releasing the lock, we need to | ||
45 | * set this bit before looking at the lock. | ||
46 | */ | ||
47 | |||
48 | static void | ||
49 | rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) | ||
50 | { | ||
51 | unsigned long val = (unsigned long)owner; | ||
52 | |||
53 | if (rt_mutex_has_waiters(lock)) | ||
54 | val |= RT_MUTEX_HAS_WAITERS; | ||
55 | |||
56 | lock->owner = (struct task_struct *)val; | ||
57 | } | ||
58 | |||
59 | static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) | ||
60 | { | ||
61 | lock->owner = (struct task_struct *) | ||
62 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); | ||
63 | } | ||
64 | |||
65 | static void fixup_rt_mutex_waiters(struct rt_mutex *lock) | ||
66 | { | ||
67 | if (!rt_mutex_has_waiters(lock)) | ||
68 | clear_rt_mutex_waiters(lock); | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * We can speed up the acquire/release, if the architecture | ||
73 | * supports cmpxchg and if there's no debugging state to be set up | ||
74 | */ | ||
75 | #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) | ||
76 | # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) | ||
77 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | ||
78 | { | ||
79 | unsigned long owner, *p = (unsigned long *) &lock->owner; | ||
80 | |||
81 | do { | ||
82 | owner = *p; | ||
83 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); | ||
84 | } | ||
85 | #else | ||
86 | # define rt_mutex_cmpxchg(l,c,n) (0) | ||
87 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | ||
88 | { | ||
89 | lock->owner = (struct task_struct *) | ||
90 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); | ||
91 | } | ||
92 | #endif | ||
93 | |||
94 | /* | ||
95 | * Calculate task priority from the waiter list priority | ||
96 | * | ||
97 | * Return task->normal_prio when the waiter list is empty or when | ||
98 | * the waiter is not allowed to do priority boosting | ||
99 | */ | ||
100 | int rt_mutex_getprio(struct task_struct *task) | ||
101 | { | ||
102 | if (likely(!task_has_pi_waiters(task))) | ||
103 | return task->normal_prio; | ||
104 | |||
105 | return min(task_top_pi_waiter(task)->pi_list_entry.prio, | ||
106 | task->normal_prio); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Adjust the priority of a task, after its pi_waiters got modified. | ||
111 | * | ||
112 | * This can be both boosting and unboosting. task->pi_lock must be held. | ||
113 | */ | ||
114 | static void __rt_mutex_adjust_prio(struct task_struct *task) | ||
115 | { | ||
116 | int prio = rt_mutex_getprio(task); | ||
117 | |||
118 | if (task->prio != prio) | ||
119 | rt_mutex_setprio(task, prio); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Adjust task priority (undo boosting). Called from the exit path of | ||
124 | * rt_mutex_slowunlock() and rt_mutex_slowlock(). | ||
125 | * | ||
126 | * (Note: We do this outside of the protection of lock->wait_lock to | ||
127 | * allow the lock to be taken while or before we readjust the priority | ||
128 | * of task. We do not use the spin_xx_mutex() variants here as we are | ||
129 | * outside of the debug path.) | ||
130 | */ | ||
131 | static void rt_mutex_adjust_prio(struct task_struct *task) | ||
132 | { | ||
133 | unsigned long flags; | ||
134 | |||
135 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
136 | __rt_mutex_adjust_prio(task); | ||
137 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Max number of times we'll walk the boosting chain: | ||
142 | */ | ||
143 | int max_lock_depth = 1024; | ||
144 | |||
145 | /* | ||
146 | * Adjust the priority chain. Also used for deadlock detection. | ||
147 | * Decreases task's usage by one - may thus free the task. | ||
148 | * | ||
149 | * @task: the task owning the mutex (owner) for which a chain walk is probably | ||
150 | * needed | ||
151 | * @deadlock_detect: do we have to carry out deadlock detection? | ||
152 | * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck | ||
153 | * things for a task that has just got its priority adjusted, and | ||
154 | * is waiting on a mutex) | ||
155 | * @orig_waiter: rt_mutex_waiter struct for the task that has just donated | ||
156 | * its priority to the mutex owner (can be NULL in the case | ||
157 | * depicted above or if the top waiter is gone away and we are | ||
158 | * actually deboosting the owner) | ||
159 | * @top_task: the current top waiter | ||
160 | * | ||
161 | * Returns 0 or -EDEADLK. | ||
162 | */ | ||
163 | static int rt_mutex_adjust_prio_chain(struct task_struct *task, | ||
164 | int deadlock_detect, | ||
165 | struct rt_mutex *orig_lock, | ||
166 | struct rt_mutex_waiter *orig_waiter, | ||
167 | struct task_struct *top_task) | ||
168 | { | ||
169 | struct rt_mutex *lock; | ||
170 | struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; | ||
171 | int detect_deadlock, ret = 0, depth = 0; | ||
172 | unsigned long flags; | ||
173 | |||
174 | detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, | ||
175 | deadlock_detect); | ||
176 | |||
177 | /* | ||
178 | * The (de)boosting is a step by step approach with a lot of | ||
179 | * pitfalls. We want this to be preemptible and we want hold a | ||
180 | * maximum of two locks per step. So we have to check | ||
181 | * carefully whether things change under us. | ||
182 | */ | ||
183 | again: | ||
184 | if (++depth > max_lock_depth) { | ||
185 | static int prev_max; | ||
186 | |||
187 | /* | ||
188 | * Print this only once. If the admin changes the limit, | ||
189 | * print a new message when reaching the limit again. | ||
190 | */ | ||
191 | if (prev_max != max_lock_depth) { | ||
192 | prev_max = max_lock_depth; | ||
193 | printk(KERN_WARNING "Maximum lock depth %d reached " | ||
194 | "task: %s (%d)\n", max_lock_depth, | ||
195 | top_task->comm, task_pid_nr(top_task)); | ||
196 | } | ||
197 | put_task_struct(task); | ||
198 | |||
199 | return deadlock_detect ? -EDEADLK : 0; | ||
200 | } | ||
201 | retry: | ||
202 | /* | ||
203 | * Task can not go away as we did a get_task() before ! | ||
204 | */ | ||
205 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
206 | |||
207 | waiter = task->pi_blocked_on; | ||
208 | /* | ||
209 | * Check whether the end of the boosting chain has been | ||
210 | * reached or the state of the chain has changed while we | ||
211 | * dropped the locks. | ||
212 | */ | ||
213 | if (!waiter) | ||
214 | goto out_unlock_pi; | ||
215 | |||
216 | /* | ||
217 | * Check the orig_waiter state. After we dropped the locks, | ||
218 | * the previous owner of the lock might have released the lock. | ||
219 | */ | ||
220 | if (orig_waiter && !rt_mutex_owner(orig_lock)) | ||
221 | goto out_unlock_pi; | ||
222 | |||
223 | /* | ||
224 | * Drop out, when the task has no waiters. Note, | ||
225 | * top_waiter can be NULL, when we are in the deboosting | ||
226 | * mode! | ||
227 | */ | ||
228 | if (top_waiter && (!task_has_pi_waiters(task) || | ||
229 | top_waiter != task_top_pi_waiter(task))) | ||
230 | goto out_unlock_pi; | ||
231 | |||
232 | /* | ||
233 | * When deadlock detection is off then we check, if further | ||
234 | * priority adjustment is necessary. | ||
235 | */ | ||
236 | if (!detect_deadlock && waiter->list_entry.prio == task->prio) | ||
237 | goto out_unlock_pi; | ||
238 | |||
239 | lock = waiter->lock; | ||
240 | if (!raw_spin_trylock(&lock->wait_lock)) { | ||
241 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
242 | cpu_relax(); | ||
243 | goto retry; | ||
244 | } | ||
245 | |||
246 | /* Deadlock detection */ | ||
247 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { | ||
248 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); | ||
249 | raw_spin_unlock(&lock->wait_lock); | ||
250 | ret = deadlock_detect ? -EDEADLK : 0; | ||
251 | goto out_unlock_pi; | ||
252 | } | ||
253 | |||
254 | top_waiter = rt_mutex_top_waiter(lock); | ||
255 | |||
256 | /* Requeue the waiter */ | ||
257 | plist_del(&waiter->list_entry, &lock->wait_list); | ||
258 | waiter->list_entry.prio = task->prio; | ||
259 | plist_add(&waiter->list_entry, &lock->wait_list); | ||
260 | |||
261 | /* Release the task */ | ||
262 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
263 | if (!rt_mutex_owner(lock)) { | ||
264 | /* | ||
265 | * If the requeue above changed the top waiter, then we need | ||
266 | * to wake the new top waiter up to try to get the lock. | ||
267 | */ | ||
268 | |||
269 | if (top_waiter != rt_mutex_top_waiter(lock)) | ||
270 | wake_up_process(rt_mutex_top_waiter(lock)->task); | ||
271 | raw_spin_unlock(&lock->wait_lock); | ||
272 | goto out_put_task; | ||
273 | } | ||
274 | put_task_struct(task); | ||
275 | |||
276 | /* Grab the next task */ | ||
277 | task = rt_mutex_owner(lock); | ||
278 | get_task_struct(task); | ||
279 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
280 | |||
281 | if (waiter == rt_mutex_top_waiter(lock)) { | ||
282 | /* Boost the owner */ | ||
283 | plist_del(&top_waiter->pi_list_entry, &task->pi_waiters); | ||
284 | waiter->pi_list_entry.prio = waiter->list_entry.prio; | ||
285 | plist_add(&waiter->pi_list_entry, &task->pi_waiters); | ||
286 | __rt_mutex_adjust_prio(task); | ||
287 | |||
288 | } else if (top_waiter == waiter) { | ||
289 | /* Deboost the owner */ | ||
290 | plist_del(&waiter->pi_list_entry, &task->pi_waiters); | ||
291 | waiter = rt_mutex_top_waiter(lock); | ||
292 | waiter->pi_list_entry.prio = waiter->list_entry.prio; | ||
293 | plist_add(&waiter->pi_list_entry, &task->pi_waiters); | ||
294 | __rt_mutex_adjust_prio(task); | ||
295 | } | ||
296 | |||
297 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
298 | |||
299 | top_waiter = rt_mutex_top_waiter(lock); | ||
300 | raw_spin_unlock(&lock->wait_lock); | ||
301 | |||
302 | if (!detect_deadlock && waiter != top_waiter) | ||
303 | goto out_put_task; | ||
304 | |||
305 | goto again; | ||
306 | |||
307 | out_unlock_pi: | ||
308 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
309 | out_put_task: | ||
310 | put_task_struct(task); | ||
311 | |||
312 | return ret; | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Try to take an rt-mutex | ||
317 | * | ||
318 | * Must be called with lock->wait_lock held. | ||
319 | * | ||
320 | * @lock: the lock to be acquired. | ||
321 | * @task: the task which wants to acquire the lock | ||
322 | * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) | ||
323 | */ | ||
324 | static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | ||
325 | struct rt_mutex_waiter *waiter) | ||
326 | { | ||
327 | /* | ||
328 | * We have to be careful here if the atomic speedups are | ||
329 | * enabled, such that, when | ||
330 | * - no other waiter is on the lock | ||
331 | * - the lock has been released since we did the cmpxchg | ||
332 | * the lock can be released or taken while we are doing the | ||
333 | * checks and marking the lock with RT_MUTEX_HAS_WAITERS. | ||
334 | * | ||
335 | * The atomic acquire/release aware variant of | ||
336 | * mark_rt_mutex_waiters uses a cmpxchg loop. After setting | ||
337 | * the WAITERS bit, the atomic release / acquire can not | ||
338 | * happen anymore and lock->wait_lock protects us from the | ||
339 | * non-atomic case. | ||
340 | * | ||
341 | * Note, that this might set lock->owner = | ||
342 | * RT_MUTEX_HAS_WAITERS in the case the lock is not contended | ||
343 | * any more. This is fixed up when we take the ownership. | ||
344 | * This is the transitional state explained at the top of this file. | ||
345 | */ | ||
346 | mark_rt_mutex_waiters(lock); | ||
347 | |||
348 | if (rt_mutex_owner(lock)) | ||
349 | return 0; | ||
350 | |||
351 | /* | ||
352 | * It will get the lock because of one of these conditions: | ||
353 | * 1) there is no waiter | ||
354 | * 2) higher priority than waiters | ||
355 | * 3) it is top waiter | ||
356 | */ | ||
357 | if (rt_mutex_has_waiters(lock)) { | ||
358 | if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) { | ||
359 | if (!waiter || waiter != rt_mutex_top_waiter(lock)) | ||
360 | return 0; | ||
361 | } | ||
362 | } | ||
363 | |||
364 | if (waiter || rt_mutex_has_waiters(lock)) { | ||
365 | unsigned long flags; | ||
366 | struct rt_mutex_waiter *top; | ||
367 | |||
368 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
369 | |||
370 | /* remove the queued waiter. */ | ||
371 | if (waiter) { | ||
372 | plist_del(&waiter->list_entry, &lock->wait_list); | ||
373 | task->pi_blocked_on = NULL; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * We have to enqueue the top waiter(if it exists) into | ||
378 | * task->pi_waiters list. | ||
379 | */ | ||
380 | if (rt_mutex_has_waiters(lock)) { | ||
381 | top = rt_mutex_top_waiter(lock); | ||
382 | top->pi_list_entry.prio = top->list_entry.prio; | ||
383 | plist_add(&top->pi_list_entry, &task->pi_waiters); | ||
384 | } | ||
385 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
386 | } | ||
387 | |||
388 | /* We got the lock. */ | ||
389 | debug_rt_mutex_lock(lock); | ||
390 | |||
391 | rt_mutex_set_owner(lock, task); | ||
392 | |||
393 | rt_mutex_deadlock_account_lock(lock, task); | ||
394 | |||
395 | return 1; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Task blocks on lock. | ||
400 | * | ||
401 | * Prepare waiter and propagate pi chain | ||
402 | * | ||
403 | * This must be called with lock->wait_lock held. | ||
404 | */ | ||
405 | static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | ||
406 | struct rt_mutex_waiter *waiter, | ||
407 | struct task_struct *task, | ||
408 | int detect_deadlock) | ||
409 | { | ||
410 | struct task_struct *owner = rt_mutex_owner(lock); | ||
411 | struct rt_mutex_waiter *top_waiter = waiter; | ||
412 | unsigned long flags; | ||
413 | int chain_walk = 0, res; | ||
414 | |||
415 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
416 | __rt_mutex_adjust_prio(task); | ||
417 | waiter->task = task; | ||
418 | waiter->lock = lock; | ||
419 | plist_node_init(&waiter->list_entry, task->prio); | ||
420 | plist_node_init(&waiter->pi_list_entry, task->prio); | ||
421 | |||
422 | /* Get the top priority waiter on the lock */ | ||
423 | if (rt_mutex_has_waiters(lock)) | ||
424 | top_waiter = rt_mutex_top_waiter(lock); | ||
425 | plist_add(&waiter->list_entry, &lock->wait_list); | ||
426 | |||
427 | task->pi_blocked_on = waiter; | ||
428 | |||
429 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
430 | |||
431 | if (!owner) | ||
432 | return 0; | ||
433 | |||
434 | if (waiter == rt_mutex_top_waiter(lock)) { | ||
435 | raw_spin_lock_irqsave(&owner->pi_lock, flags); | ||
436 | plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); | ||
437 | plist_add(&waiter->pi_list_entry, &owner->pi_waiters); | ||
438 | |||
439 | __rt_mutex_adjust_prio(owner); | ||
440 | if (owner->pi_blocked_on) | ||
441 | chain_walk = 1; | ||
442 | raw_spin_unlock_irqrestore(&owner->pi_lock, flags); | ||
443 | } | ||
444 | else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) | ||
445 | chain_walk = 1; | ||
446 | |||
447 | if (!chain_walk) | ||
448 | return 0; | ||
449 | |||
450 | /* | ||
451 | * The owner can't disappear while holding a lock, | ||
452 | * so the owner struct is protected by wait_lock. | ||
453 | * Gets dropped in rt_mutex_adjust_prio_chain()! | ||
454 | */ | ||
455 | get_task_struct(owner); | ||
456 | |||
457 | raw_spin_unlock(&lock->wait_lock); | ||
458 | |||
459 | res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, | ||
460 | task); | ||
461 | |||
462 | raw_spin_lock(&lock->wait_lock); | ||
463 | |||
464 | return res; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Wake up the next waiter on the lock. | ||
469 | * | ||
470 | * Remove the top waiter from the current tasks waiter list and wake it up. | ||
471 | * | ||
472 | * Called with lock->wait_lock held. | ||
473 | */ | ||
474 | static void wakeup_next_waiter(struct rt_mutex *lock) | ||
475 | { | ||
476 | struct rt_mutex_waiter *waiter; | ||
477 | unsigned long flags; | ||
478 | |||
479 | raw_spin_lock_irqsave(¤t->pi_lock, flags); | ||
480 | |||
481 | waiter = rt_mutex_top_waiter(lock); | ||
482 | |||
483 | /* | ||
484 | * Remove it from current->pi_waiters. We do not adjust a | ||
485 | * possible priority boost right now. We execute wakeup in the | ||
486 | * boosted mode and go back to normal after releasing | ||
487 | * lock->wait_lock. | ||
488 | */ | ||
489 | plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); | ||
490 | |||
491 | rt_mutex_set_owner(lock, NULL); | ||
492 | |||
493 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | ||
494 | |||
495 | wake_up_process(waiter->task); | ||
496 | } | ||
497 | |||
498 | /* | ||
499 | * Remove a waiter from a lock and give up | ||
500 | * | ||
501 | * Must be called with lock->wait_lock held and | ||
502 | * have just failed to try_to_take_rt_mutex(). | ||
503 | */ | ||
504 | static void remove_waiter(struct rt_mutex *lock, | ||
505 | struct rt_mutex_waiter *waiter) | ||
506 | { | ||
507 | int first = (waiter == rt_mutex_top_waiter(lock)); | ||
508 | struct task_struct *owner = rt_mutex_owner(lock); | ||
509 | unsigned long flags; | ||
510 | int chain_walk = 0; | ||
511 | |||
512 | raw_spin_lock_irqsave(¤t->pi_lock, flags); | ||
513 | plist_del(&waiter->list_entry, &lock->wait_list); | ||
514 | current->pi_blocked_on = NULL; | ||
515 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | ||
516 | |||
517 | if (!owner) | ||
518 | return; | ||
519 | |||
520 | if (first) { | ||
521 | |||
522 | raw_spin_lock_irqsave(&owner->pi_lock, flags); | ||
523 | |||
524 | plist_del(&waiter->pi_list_entry, &owner->pi_waiters); | ||
525 | |||
526 | if (rt_mutex_has_waiters(lock)) { | ||
527 | struct rt_mutex_waiter *next; | ||
528 | |||
529 | next = rt_mutex_top_waiter(lock); | ||
530 | plist_add(&next->pi_list_entry, &owner->pi_waiters); | ||
531 | } | ||
532 | __rt_mutex_adjust_prio(owner); | ||
533 | |||
534 | if (owner->pi_blocked_on) | ||
535 | chain_walk = 1; | ||
536 | |||
537 | raw_spin_unlock_irqrestore(&owner->pi_lock, flags); | ||
538 | } | ||
539 | |||
540 | WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); | ||
541 | |||
542 | if (!chain_walk) | ||
543 | return; | ||
544 | |||
545 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | ||
546 | get_task_struct(owner); | ||
547 | |||
548 | raw_spin_unlock(&lock->wait_lock); | ||
549 | |||
550 | rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); | ||
551 | |||
552 | raw_spin_lock(&lock->wait_lock); | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * Recheck the pi chain, in case we got a priority setting | ||
557 | * | ||
558 | * Called from sched_setscheduler | ||
559 | */ | ||
560 | void rt_mutex_adjust_pi(struct task_struct *task) | ||
561 | { | ||
562 | struct rt_mutex_waiter *waiter; | ||
563 | unsigned long flags; | ||
564 | |||
565 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
566 | |||
567 | waiter = task->pi_blocked_on; | ||
568 | if (!waiter || waiter->list_entry.prio == task->prio) { | ||
569 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
570 | return; | ||
571 | } | ||
572 | |||
573 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
574 | |||
575 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | ||
576 | get_task_struct(task); | ||
577 | rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); | ||
578 | } | ||
579 | |||
580 | /** | ||
581 | * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop | ||
582 | * @lock: the rt_mutex to take | ||
583 | * @state: the state the task should block in (TASK_INTERRUPTIBLE | ||
584 | * or TASK_UNINTERRUPTIBLE) | ||
585 | * @timeout: the pre-initialized and started timer, or NULL for none | ||
586 | * @waiter: the pre-initialized rt_mutex_waiter | ||
587 | * | ||
588 | * lock->wait_lock must be held by the caller. | ||
589 | */ | ||
590 | static int __sched | ||
591 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | ||
592 | struct hrtimer_sleeper *timeout, | ||
593 | struct rt_mutex_waiter *waiter) | ||
594 | { | ||
595 | int ret = 0; | ||
596 | |||
597 | for (;;) { | ||
598 | /* Try to acquire the lock: */ | ||
599 | if (try_to_take_rt_mutex(lock, current, waiter)) | ||
600 | break; | ||
601 | |||
602 | /* | ||
603 | * TASK_INTERRUPTIBLE checks for signals and | ||
604 | * timeout. Ignored otherwise. | ||
605 | */ | ||
606 | if (unlikely(state == TASK_INTERRUPTIBLE)) { | ||
607 | /* Signal pending? */ | ||
608 | if (signal_pending(current)) | ||
609 | ret = -EINTR; | ||
610 | if (timeout && !timeout->task) | ||
611 | ret = -ETIMEDOUT; | ||
612 | if (ret) | ||
613 | break; | ||
614 | } | ||
615 | |||
616 | raw_spin_unlock(&lock->wait_lock); | ||
617 | |||
618 | debug_rt_mutex_print_deadlock(waiter); | ||
619 | |||
620 | schedule_rt_mutex(lock); | ||
621 | |||
622 | raw_spin_lock(&lock->wait_lock); | ||
623 | set_current_state(state); | ||
624 | } | ||
625 | |||
626 | return ret; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * Slow path lock function: | ||
631 | */ | ||
632 | static int __sched | ||
633 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | ||
634 | struct hrtimer_sleeper *timeout, | ||
635 | int detect_deadlock) | ||
636 | { | ||
637 | struct rt_mutex_waiter waiter; | ||
638 | int ret = 0; | ||
639 | |||
640 | debug_rt_mutex_init_waiter(&waiter); | ||
641 | |||
642 | raw_spin_lock(&lock->wait_lock); | ||
643 | |||
644 | /* Try to acquire the lock again: */ | ||
645 | if (try_to_take_rt_mutex(lock, current, NULL)) { | ||
646 | raw_spin_unlock(&lock->wait_lock); | ||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | set_current_state(state); | ||
651 | |||
652 | /* Setup the timer, when timeout != NULL */ | ||
653 | if (unlikely(timeout)) { | ||
654 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | ||
655 | if (!hrtimer_active(&timeout->timer)) | ||
656 | timeout->task = NULL; | ||
657 | } | ||
658 | |||
659 | ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); | ||
660 | |||
661 | if (likely(!ret)) | ||
662 | ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | ||
663 | |||
664 | set_current_state(TASK_RUNNING); | ||
665 | |||
666 | if (unlikely(ret)) | ||
667 | remove_waiter(lock, &waiter); | ||
668 | |||
669 | /* | ||
670 | * try_to_take_rt_mutex() sets the waiter bit | ||
671 | * unconditionally. We might have to fix that up. | ||
672 | */ | ||
673 | fixup_rt_mutex_waiters(lock); | ||
674 | |||
675 | raw_spin_unlock(&lock->wait_lock); | ||
676 | |||
677 | /* Remove pending timer: */ | ||
678 | if (unlikely(timeout)) | ||
679 | hrtimer_cancel(&timeout->timer); | ||
680 | |||
681 | debug_rt_mutex_free_waiter(&waiter); | ||
682 | |||
683 | return ret; | ||
684 | } | ||
685 | |||
686 | /* | ||
687 | * Slow path try-lock function: | ||
688 | */ | ||
689 | static inline int | ||
690 | rt_mutex_slowtrylock(struct rt_mutex *lock) | ||
691 | { | ||
692 | int ret = 0; | ||
693 | |||
694 | raw_spin_lock(&lock->wait_lock); | ||
695 | |||
696 | if (likely(rt_mutex_owner(lock) != current)) { | ||
697 | |||
698 | ret = try_to_take_rt_mutex(lock, current, NULL); | ||
699 | /* | ||
700 | * try_to_take_rt_mutex() sets the lock waiters | ||
701 | * bit unconditionally. Clean this up. | ||
702 | */ | ||
703 | fixup_rt_mutex_waiters(lock); | ||
704 | } | ||
705 | |||
706 | raw_spin_unlock(&lock->wait_lock); | ||
707 | |||
708 | return ret; | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * Slow path to release a rt-mutex: | ||
713 | */ | ||
714 | static void __sched | ||
715 | rt_mutex_slowunlock(struct rt_mutex *lock) | ||
716 | { | ||
717 | raw_spin_lock(&lock->wait_lock); | ||
718 | |||
719 | debug_rt_mutex_unlock(lock); | ||
720 | |||
721 | rt_mutex_deadlock_account_unlock(current); | ||
722 | |||
723 | if (!rt_mutex_has_waiters(lock)) { | ||
724 | lock->owner = NULL; | ||
725 | raw_spin_unlock(&lock->wait_lock); | ||
726 | return; | ||
727 | } | ||
728 | |||
729 | wakeup_next_waiter(lock); | ||
730 | |||
731 | raw_spin_unlock(&lock->wait_lock); | ||
732 | |||
733 | /* Undo pi boosting if necessary: */ | ||
734 | rt_mutex_adjust_prio(current); | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * debug aware fast / slowpath lock,trylock,unlock | ||
739 | * | ||
740 | * The atomic acquire/release ops are compiled away, when either the | ||
741 | * architecture does not support cmpxchg or when debugging is enabled. | ||
742 | */ | ||
743 | static inline int | ||
744 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | ||
745 | int detect_deadlock, | ||
746 | int (*slowfn)(struct rt_mutex *lock, int state, | ||
747 | struct hrtimer_sleeper *timeout, | ||
748 | int detect_deadlock)) | ||
749 | { | ||
750 | if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { | ||
751 | rt_mutex_deadlock_account_lock(lock, current); | ||
752 | return 0; | ||
753 | } else | ||
754 | return slowfn(lock, state, NULL, detect_deadlock); | ||
755 | } | ||
756 | |||
757 | static inline int | ||
758 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | ||
759 | struct hrtimer_sleeper *timeout, int detect_deadlock, | ||
760 | int (*slowfn)(struct rt_mutex *lock, int state, | ||
761 | struct hrtimer_sleeper *timeout, | ||
762 | int detect_deadlock)) | ||
763 | { | ||
764 | if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { | ||
765 | rt_mutex_deadlock_account_lock(lock, current); | ||
766 | return 0; | ||
767 | } else | ||
768 | return slowfn(lock, state, timeout, detect_deadlock); | ||
769 | } | ||
770 | |||
771 | static inline int | ||
772 | rt_mutex_fasttrylock(struct rt_mutex *lock, | ||
773 | int (*slowfn)(struct rt_mutex *lock)) | ||
774 | { | ||
775 | if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { | ||
776 | rt_mutex_deadlock_account_lock(lock, current); | ||
777 | return 1; | ||
778 | } | ||
779 | return slowfn(lock); | ||
780 | } | ||
781 | |||
782 | static inline void | ||
783 | rt_mutex_fastunlock(struct rt_mutex *lock, | ||
784 | void (*slowfn)(struct rt_mutex *lock)) | ||
785 | { | ||
786 | if (likely(rt_mutex_cmpxchg(lock, current, NULL))) | ||
787 | rt_mutex_deadlock_account_unlock(current); | ||
788 | else | ||
789 | slowfn(lock); | ||
790 | } | ||
791 | |||
792 | /** | ||
793 | * rt_mutex_lock - lock a rt_mutex | ||
794 | * | ||
795 | * @lock: the rt_mutex to be locked | ||
796 | */ | ||
797 | void __sched rt_mutex_lock(struct rt_mutex *lock) | ||
798 | { | ||
799 | might_sleep(); | ||
800 | |||
801 | rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); | ||
802 | } | ||
803 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | ||
804 | |||
805 | /** | ||
806 | * rt_mutex_lock_interruptible - lock a rt_mutex interruptible | ||
807 | * | ||
808 | * @lock: the rt_mutex to be locked | ||
809 | * @detect_deadlock: deadlock detection on/off | ||
810 | * | ||
811 | * Returns: | ||
812 | * 0 on success | ||
813 | * -EINTR when interrupted by a signal | ||
814 | * -EDEADLK when the lock would deadlock (when deadlock detection is on) | ||
815 | */ | ||
816 | int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, | ||
817 | int detect_deadlock) | ||
818 | { | ||
819 | might_sleep(); | ||
820 | |||
821 | return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, | ||
822 | detect_deadlock, rt_mutex_slowlock); | ||
823 | } | ||
824 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | ||
825 | |||
826 | /** | ||
827 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | ||
828 | * the timeout structure is provided | ||
829 | * by the caller | ||
830 | * | ||
831 | * @lock: the rt_mutex to be locked | ||
832 | * @timeout: timeout structure or NULL (no timeout) | ||
833 | * @detect_deadlock: deadlock detection on/off | ||
834 | * | ||
835 | * Returns: | ||
836 | * 0 on success | ||
837 | * -EINTR when interrupted by a signal | ||
838 | * -ETIMEDOUT when the timeout expired | ||
839 | * -EDEADLK when the lock would deadlock (when deadlock detection is on) | ||
840 | */ | ||
841 | int | ||
842 | rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, | ||
843 | int detect_deadlock) | ||
844 | { | ||
845 | might_sleep(); | ||
846 | |||
847 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | ||
848 | detect_deadlock, rt_mutex_slowlock); | ||
849 | } | ||
850 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | ||
851 | |||
852 | /** | ||
853 | * rt_mutex_trylock - try to lock a rt_mutex | ||
854 | * | ||
855 | * @lock: the rt_mutex to be locked | ||
856 | * | ||
857 | * Returns 1 on success and 0 on contention | ||
858 | */ | ||
859 | int __sched rt_mutex_trylock(struct rt_mutex *lock) | ||
860 | { | ||
861 | return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | ||
862 | } | ||
863 | EXPORT_SYMBOL_GPL(rt_mutex_trylock); | ||
864 | |||
865 | /** | ||
866 | * rt_mutex_unlock - unlock a rt_mutex | ||
867 | * | ||
868 | * @lock: the rt_mutex to be unlocked | ||
869 | */ | ||
870 | void __sched rt_mutex_unlock(struct rt_mutex *lock) | ||
871 | { | ||
872 | rt_mutex_fastunlock(lock, rt_mutex_slowunlock); | ||
873 | } | ||
874 | EXPORT_SYMBOL_GPL(rt_mutex_unlock); | ||
875 | |||
876 | /** | ||
877 | * rt_mutex_destroy - mark a mutex unusable | ||
878 | * @lock: the mutex to be destroyed | ||
879 | * | ||
880 | * This function marks the mutex uninitialized, and any subsequent | ||
881 | * use of the mutex is forbidden. The mutex must not be locked when | ||
882 | * this function is called. | ||
883 | */ | ||
884 | void rt_mutex_destroy(struct rt_mutex *lock) | ||
885 | { | ||
886 | WARN_ON(rt_mutex_is_locked(lock)); | ||
887 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
888 | lock->magic = NULL; | ||
889 | #endif | ||
890 | } | ||
891 | |||
892 | EXPORT_SYMBOL_GPL(rt_mutex_destroy); | ||
893 | |||
894 | /** | ||
895 | * __rt_mutex_init - initialize the rt lock | ||
896 | * | ||
897 | * @lock: the rt lock to be initialized | ||
898 | * | ||
899 | * Initialize the rt lock to unlocked state. | ||
900 | * | ||
901 | * Initializing of a locked rt lock is not allowed | ||
902 | */ | ||
903 | void __rt_mutex_init(struct rt_mutex *lock, const char *name) | ||
904 | { | ||
905 | lock->owner = NULL; | ||
906 | raw_spin_lock_init(&lock->wait_lock); | ||
907 | plist_head_init(&lock->wait_list); | ||
908 | |||
909 | debug_rt_mutex_init(lock, name); | ||
910 | } | ||
911 | EXPORT_SYMBOL_GPL(__rt_mutex_init); | ||
912 | |||
913 | /** | ||
914 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | ||
915 | * proxy owner | ||
916 | * | ||
917 | * @lock: the rt_mutex to be locked | ||
918 | * @proxy_owner:the task to set as owner | ||
919 | * | ||
920 | * No locking. Caller has to do serializing itself | ||
921 | * Special API call for PI-futex support | ||
922 | */ | ||
923 | void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | ||
924 | struct task_struct *proxy_owner) | ||
925 | { | ||
926 | __rt_mutex_init(lock, NULL); | ||
927 | debug_rt_mutex_proxy_lock(lock, proxy_owner); | ||
928 | rt_mutex_set_owner(lock, proxy_owner); | ||
929 | rt_mutex_deadlock_account_lock(lock, proxy_owner); | ||
930 | } | ||
931 | |||
932 | /** | ||
933 | * rt_mutex_proxy_unlock - release a lock on behalf of owner | ||
934 | * | ||
935 | * @lock: the rt_mutex to be locked | ||
936 | * | ||
937 | * No locking. Caller has to do serializing itself | ||
938 | * Special API call for PI-futex support | ||
939 | */ | ||
940 | void rt_mutex_proxy_unlock(struct rt_mutex *lock, | ||
941 | struct task_struct *proxy_owner) | ||
942 | { | ||
943 | debug_rt_mutex_proxy_unlock(lock); | ||
944 | rt_mutex_set_owner(lock, NULL); | ||
945 | rt_mutex_deadlock_account_unlock(proxy_owner); | ||
946 | } | ||
947 | |||
948 | /** | ||
949 | * rt_mutex_start_proxy_lock() - Start lock acquisition for another task | ||
950 | * @lock: the rt_mutex to take | ||
951 | * @waiter: the pre-initialized rt_mutex_waiter | ||
952 | * @task: the task to prepare | ||
953 | * @detect_deadlock: perform deadlock detection (1) or not (0) | ||
954 | * | ||
955 | * Returns: | ||
956 | * 0 - task blocked on lock | ||
957 | * 1 - acquired the lock for task, caller should wake it up | ||
958 | * <0 - error | ||
959 | * | ||
960 | * Special API call for FUTEX_REQUEUE_PI support. | ||
961 | */ | ||
962 | int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | ||
963 | struct rt_mutex_waiter *waiter, | ||
964 | struct task_struct *task, int detect_deadlock) | ||
965 | { | ||
966 | int ret; | ||
967 | |||
968 | raw_spin_lock(&lock->wait_lock); | ||
969 | |||
970 | if (try_to_take_rt_mutex(lock, task, NULL)) { | ||
971 | raw_spin_unlock(&lock->wait_lock); | ||
972 | return 1; | ||
973 | } | ||
974 | |||
975 | ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); | ||
976 | |||
977 | if (ret && !rt_mutex_owner(lock)) { | ||
978 | /* | ||
979 | * Reset the return value. We might have | ||
980 | * returned with -EDEADLK and the owner | ||
981 | * released the lock while we were walking the | ||
982 | * pi chain. Let the waiter sort it out. | ||
983 | */ | ||
984 | ret = 0; | ||
985 | } | ||
986 | |||
987 | if (unlikely(ret)) | ||
988 | remove_waiter(lock, waiter); | ||
989 | |||
990 | raw_spin_unlock(&lock->wait_lock); | ||
991 | |||
992 | debug_rt_mutex_print_deadlock(waiter); | ||
993 | |||
994 | return ret; | ||
995 | } | ||
996 | |||
997 | /** | ||
998 | * rt_mutex_next_owner - return the next owner of the lock | ||
999 | * | ||
1000 | * @lock: the rt lock query | ||
1001 | * | ||
1002 | * Returns the next owner of the lock or NULL | ||
1003 | * | ||
1004 | * Caller has to serialize against other accessors to the lock | ||
1005 | * itself. | ||
1006 | * | ||
1007 | * Special API call for PI-futex support | ||
1008 | */ | ||
1009 | struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) | ||
1010 | { | ||
1011 | if (!rt_mutex_has_waiters(lock)) | ||
1012 | return NULL; | ||
1013 | |||
1014 | return rt_mutex_top_waiter(lock)->task; | ||
1015 | } | ||
1016 | |||
1017 | /** | ||
1018 | * rt_mutex_finish_proxy_lock() - Complete lock acquisition | ||
1019 | * @lock: the rt_mutex we were woken on | ||
1020 | * @to: the timeout, null if none. hrtimer should already have | ||
1021 | * been started. | ||
1022 | * @waiter: the pre-initialized rt_mutex_waiter | ||
1023 | * @detect_deadlock: perform deadlock detection (1) or not (0) | ||
1024 | * | ||
1025 | * Complete the lock acquisition started our behalf by another thread. | ||
1026 | * | ||
1027 | * Returns: | ||
1028 | * 0 - success | ||
1029 | * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK | ||
1030 | * | ||
1031 | * Special API call for PI-futex requeue support | ||
1032 | */ | ||
1033 | int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | ||
1034 | struct hrtimer_sleeper *to, | ||
1035 | struct rt_mutex_waiter *waiter, | ||
1036 | int detect_deadlock) | ||
1037 | { | ||
1038 | int ret; | ||
1039 | |||
1040 | raw_spin_lock(&lock->wait_lock); | ||
1041 | |||
1042 | set_current_state(TASK_INTERRUPTIBLE); | ||
1043 | |||
1044 | ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | ||
1045 | |||
1046 | set_current_state(TASK_RUNNING); | ||
1047 | |||
1048 | if (unlikely(ret)) | ||
1049 | remove_waiter(lock, waiter); | ||
1050 | |||
1051 | /* | ||
1052 | * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might | ||
1053 | * have to fix that up. | ||
1054 | */ | ||
1055 | fixup_rt_mutex_waiters(lock); | ||
1056 | |||
1057 | raw_spin_unlock(&lock->wait_lock); | ||
1058 | |||
1059 | return ret; | ||
1060 | } | ||
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h new file mode 100644 index 000000000000..a1a1dd06421d --- /dev/null +++ b/kernel/locking/rtmutex.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * RT-Mutexes: blocking mutual exclusion locks with PI support | ||
3 | * | ||
4 | * started by Ingo Molnar and Thomas Gleixner: | ||
5 | * | ||
6 | * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * | ||
9 | * This file contains macros used solely by rtmutex.c. | ||
10 | * Non-debug version. | ||
11 | */ | ||
12 | |||
13 | #define rt_mutex_deadlock_check(l) (0) | ||
14 | #define rt_mutex_deadlock_account_lock(m, t) do { } while (0) | ||
15 | #define rt_mutex_deadlock_account_unlock(l) do { } while (0) | ||
16 | #define debug_rt_mutex_init_waiter(w) do { } while (0) | ||
17 | #define debug_rt_mutex_free_waiter(w) do { } while (0) | ||
18 | #define debug_rt_mutex_lock(l) do { } while (0) | ||
19 | #define debug_rt_mutex_proxy_lock(l,p) do { } while (0) | ||
20 | #define debug_rt_mutex_proxy_unlock(l) do { } while (0) | ||
21 | #define debug_rt_mutex_unlock(l) do { } while (0) | ||
22 | #define debug_rt_mutex_init(m, n) do { } while (0) | ||
23 | #define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) | ||
24 | #define debug_rt_mutex_print_deadlock(w) do { } while (0) | ||
25 | #define debug_rt_mutex_detect_deadlock(w,d) (d) | ||
26 | #define debug_rt_mutex_reset_waiter(w) do { } while (0) | ||
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h new file mode 100644 index 000000000000..53a66c85261b --- /dev/null +++ b/kernel/locking/rtmutex_common.h | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * RT Mutexes: blocking mutual exclusion locks with PI support | ||
3 | * | ||
4 | * started by Ingo Molnar and Thomas Gleixner: | ||
5 | * | ||
6 | * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * | ||
9 | * This file contains the private data structure and API definitions. | ||
10 | */ | ||
11 | |||
12 | #ifndef __KERNEL_RTMUTEX_COMMON_H | ||
13 | #define __KERNEL_RTMUTEX_COMMON_H | ||
14 | |||
15 | #include <linux/rtmutex.h> | ||
16 | |||
17 | /* | ||
18 | * The rtmutex in kernel tester is independent of rtmutex debugging. We | ||
19 | * call schedule_rt_mutex_test() instead of schedule() for the tasks which | ||
20 | * belong to the tester. That way we can delay the wakeup path of those | ||
21 | * threads to provoke lock stealing and testing of complex boosting scenarios. | ||
22 | */ | ||
23 | #ifdef CONFIG_RT_MUTEX_TESTER | ||
24 | |||
25 | extern void schedule_rt_mutex_test(struct rt_mutex *lock); | ||
26 | |||
27 | #define schedule_rt_mutex(_lock) \ | ||
28 | do { \ | ||
29 | if (!(current->flags & PF_MUTEX_TESTER)) \ | ||
30 | schedule(); \ | ||
31 | else \ | ||
32 | schedule_rt_mutex_test(_lock); \ | ||
33 | } while (0) | ||
34 | |||
35 | #else | ||
36 | # define schedule_rt_mutex(_lock) schedule() | ||
37 | #endif | ||
38 | |||
39 | /* | ||
40 | * This is the control structure for tasks blocked on a rt_mutex, | ||
41 | * which is allocated on the kernel stack on of the blocked task. | ||
42 | * | ||
43 | * @list_entry: pi node to enqueue into the mutex waiters list | ||
44 | * @pi_list_entry: pi node to enqueue into the mutex owner waiters list | ||
45 | * @task: task reference to the blocked task | ||
46 | */ | ||
47 | struct rt_mutex_waiter { | ||
48 | struct plist_node list_entry; | ||
49 | struct plist_node pi_list_entry; | ||
50 | struct task_struct *task; | ||
51 | struct rt_mutex *lock; | ||
52 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
53 | unsigned long ip; | ||
54 | struct pid *deadlock_task_pid; | ||
55 | struct rt_mutex *deadlock_lock; | ||
56 | #endif | ||
57 | }; | ||
58 | |||
59 | /* | ||
60 | * Various helpers to access the waiters-plist: | ||
61 | */ | ||
62 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) | ||
63 | { | ||
64 | return !plist_head_empty(&lock->wait_list); | ||
65 | } | ||
66 | |||
67 | static inline struct rt_mutex_waiter * | ||
68 | rt_mutex_top_waiter(struct rt_mutex *lock) | ||
69 | { | ||
70 | struct rt_mutex_waiter *w; | ||
71 | |||
72 | w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter, | ||
73 | list_entry); | ||
74 | BUG_ON(w->lock != lock); | ||
75 | |||
76 | return w; | ||
77 | } | ||
78 | |||
79 | static inline int task_has_pi_waiters(struct task_struct *p) | ||
80 | { | ||
81 | return !plist_head_empty(&p->pi_waiters); | ||
82 | } | ||
83 | |||
84 | static inline struct rt_mutex_waiter * | ||
85 | task_top_pi_waiter(struct task_struct *p) | ||
86 | { | ||
87 | return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter, | ||
88 | pi_list_entry); | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * lock->owner state tracking: | ||
93 | */ | ||
94 | #define RT_MUTEX_HAS_WAITERS 1UL | ||
95 | #define RT_MUTEX_OWNER_MASKALL 1UL | ||
96 | |||
97 | static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) | ||
98 | { | ||
99 | return (struct task_struct *) | ||
100 | ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * PI-futex support (proxy locking functions, etc.): | ||
105 | */ | ||
106 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | ||
107 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | ||
108 | struct task_struct *proxy_owner); | ||
109 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | ||
110 | struct task_struct *proxy_owner); | ||
111 | extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | ||
112 | struct rt_mutex_waiter *waiter, | ||
113 | struct task_struct *task, | ||
114 | int detect_deadlock); | ||
115 | extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | ||
116 | struct hrtimer_sleeper *to, | ||
117 | struct rt_mutex_waiter *waiter, | ||
118 | int detect_deadlock); | ||
119 | |||
120 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
121 | # include "rtmutex-debug.h" | ||
122 | #else | ||
123 | # include "rtmutex.h" | ||
124 | #endif | ||
125 | |||
126 | #endif | ||
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c new file mode 100644 index 000000000000..9be8a9144978 --- /dev/null +++ b/kernel/locking/rwsem-spinlock.c | |||
@@ -0,0 +1,296 @@ | |||
1 | /* rwsem-spinlock.c: R/W semaphores: contention handling functions for | ||
2 | * generic spinlock implementation | ||
3 | * | ||
4 | * Copyright (c) 2001 David Howells (dhowells@redhat.com). | ||
5 | * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | ||
6 | * - Derived also from comments by Linus | ||
7 | */ | ||
8 | #include <linux/rwsem.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/export.h> | ||
11 | |||
12 | enum rwsem_waiter_type { | ||
13 | RWSEM_WAITING_FOR_WRITE, | ||
14 | RWSEM_WAITING_FOR_READ | ||
15 | }; | ||
16 | |||
17 | struct rwsem_waiter { | ||
18 | struct list_head list; | ||
19 | struct task_struct *task; | ||
20 | enum rwsem_waiter_type type; | ||
21 | }; | ||
22 | |||
23 | int rwsem_is_locked(struct rw_semaphore *sem) | ||
24 | { | ||
25 | int ret = 1; | ||
26 | unsigned long flags; | ||
27 | |||
28 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { | ||
29 | ret = (sem->activity != 0); | ||
30 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
31 | } | ||
32 | return ret; | ||
33 | } | ||
34 | EXPORT_SYMBOL(rwsem_is_locked); | ||
35 | |||
36 | /* | ||
37 | * initialise the semaphore | ||
38 | */ | ||
39 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | ||
40 | struct lock_class_key *key) | ||
41 | { | ||
42 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
43 | /* | ||
44 | * Make sure we are not reinitializing a held semaphore: | ||
45 | */ | ||
46 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | ||
47 | lockdep_init_map(&sem->dep_map, name, key, 0); | ||
48 | #endif | ||
49 | sem->activity = 0; | ||
50 | raw_spin_lock_init(&sem->wait_lock); | ||
51 | INIT_LIST_HEAD(&sem->wait_list); | ||
52 | } | ||
53 | EXPORT_SYMBOL(__init_rwsem); | ||
54 | |||
55 | /* | ||
56 | * handle the lock release when processes blocked on it that can now run | ||
57 | * - if we come here, then: | ||
58 | * - the 'active count' _reached_ zero | ||
59 | * - the 'waiting count' is non-zero | ||
60 | * - the spinlock must be held by the caller | ||
61 | * - woken process blocks are discarded from the list after having task zeroed | ||
62 | * - writers are only woken if wakewrite is non-zero | ||
63 | */ | ||
64 | static inline struct rw_semaphore * | ||
65 | __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | ||
66 | { | ||
67 | struct rwsem_waiter *waiter; | ||
68 | struct task_struct *tsk; | ||
69 | int woken; | ||
70 | |||
71 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
72 | |||
73 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | ||
74 | if (wakewrite) | ||
75 | /* Wake up a writer. Note that we do not grant it the | ||
76 | * lock - it will have to acquire it when it runs. */ | ||
77 | wake_up_process(waiter->task); | ||
78 | goto out; | ||
79 | } | ||
80 | |||
81 | /* grant an infinite number of read locks to the front of the queue */ | ||
82 | woken = 0; | ||
83 | do { | ||
84 | struct list_head *next = waiter->list.next; | ||
85 | |||
86 | list_del(&waiter->list); | ||
87 | tsk = waiter->task; | ||
88 | smp_mb(); | ||
89 | waiter->task = NULL; | ||
90 | wake_up_process(tsk); | ||
91 | put_task_struct(tsk); | ||
92 | woken++; | ||
93 | if (next == &sem->wait_list) | ||
94 | break; | ||
95 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
96 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | ||
97 | |||
98 | sem->activity += woken; | ||
99 | |||
100 | out: | ||
101 | return sem; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * wake a single writer | ||
106 | */ | ||
107 | static inline struct rw_semaphore * | ||
108 | __rwsem_wake_one_writer(struct rw_semaphore *sem) | ||
109 | { | ||
110 | struct rwsem_waiter *waiter; | ||
111 | |||
112 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
113 | wake_up_process(waiter->task); | ||
114 | |||
115 | return sem; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * get a read lock on the semaphore | ||
120 | */ | ||
121 | void __sched __down_read(struct rw_semaphore *sem) | ||
122 | { | ||
123 | struct rwsem_waiter waiter; | ||
124 | struct task_struct *tsk; | ||
125 | unsigned long flags; | ||
126 | |||
127 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
128 | |||
129 | if (sem->activity >= 0 && list_empty(&sem->wait_list)) { | ||
130 | /* granted */ | ||
131 | sem->activity++; | ||
132 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
133 | goto out; | ||
134 | } | ||
135 | |||
136 | tsk = current; | ||
137 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
138 | |||
139 | /* set up my own style of waitqueue */ | ||
140 | waiter.task = tsk; | ||
141 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
142 | get_task_struct(tsk); | ||
143 | |||
144 | list_add_tail(&waiter.list, &sem->wait_list); | ||
145 | |||
146 | /* we don't need to touch the semaphore struct anymore */ | ||
147 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
148 | |||
149 | /* wait to be given the lock */ | ||
150 | for (;;) { | ||
151 | if (!waiter.task) | ||
152 | break; | ||
153 | schedule(); | ||
154 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
155 | } | ||
156 | |||
157 | tsk->state = TASK_RUNNING; | ||
158 | out: | ||
159 | ; | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
164 | */ | ||
165 | int __down_read_trylock(struct rw_semaphore *sem) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | int ret = 0; | ||
169 | |||
170 | |||
171 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
172 | |||
173 | if (sem->activity >= 0 && list_empty(&sem->wait_list)) { | ||
174 | /* granted */ | ||
175 | sem->activity++; | ||
176 | ret = 1; | ||
177 | } | ||
178 | |||
179 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
180 | |||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * get a write lock on the semaphore | ||
186 | */ | ||
187 | void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | ||
188 | { | ||
189 | struct rwsem_waiter waiter; | ||
190 | struct task_struct *tsk; | ||
191 | unsigned long flags; | ||
192 | |||
193 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
194 | |||
195 | /* set up my own style of waitqueue */ | ||
196 | tsk = current; | ||
197 | waiter.task = tsk; | ||
198 | waiter.type = RWSEM_WAITING_FOR_WRITE; | ||
199 | list_add_tail(&waiter.list, &sem->wait_list); | ||
200 | |||
201 | /* wait for someone to release the lock */ | ||
202 | for (;;) { | ||
203 | /* | ||
204 | * That is the key to support write lock stealing: allows the | ||
205 | * task already on CPU to get the lock soon rather than put | ||
206 | * itself into sleep and waiting for system woke it or someone | ||
207 | * else in the head of the wait list up. | ||
208 | */ | ||
209 | if (sem->activity == 0) | ||
210 | break; | ||
211 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
212 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
213 | schedule(); | ||
214 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
215 | } | ||
216 | /* got the lock */ | ||
217 | sem->activity = -1; | ||
218 | list_del(&waiter.list); | ||
219 | |||
220 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
221 | } | ||
222 | |||
223 | void __sched __down_write(struct rw_semaphore *sem) | ||
224 | { | ||
225 | __down_write_nested(sem, 0); | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
230 | */ | ||
231 | int __down_write_trylock(struct rw_semaphore *sem) | ||
232 | { | ||
233 | unsigned long flags; | ||
234 | int ret = 0; | ||
235 | |||
236 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
237 | |||
238 | if (sem->activity == 0) { | ||
239 | /* got the lock */ | ||
240 | sem->activity = -1; | ||
241 | ret = 1; | ||
242 | } | ||
243 | |||
244 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
245 | |||
246 | return ret; | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * release a read lock on the semaphore | ||
251 | */ | ||
252 | void __up_read(struct rw_semaphore *sem) | ||
253 | { | ||
254 | unsigned long flags; | ||
255 | |||
256 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
257 | |||
258 | if (--sem->activity == 0 && !list_empty(&sem->wait_list)) | ||
259 | sem = __rwsem_wake_one_writer(sem); | ||
260 | |||
261 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
262 | } | ||
263 | |||
264 | /* | ||
265 | * release a write lock on the semaphore | ||
266 | */ | ||
267 | void __up_write(struct rw_semaphore *sem) | ||
268 | { | ||
269 | unsigned long flags; | ||
270 | |||
271 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
272 | |||
273 | sem->activity = 0; | ||
274 | if (!list_empty(&sem->wait_list)) | ||
275 | sem = __rwsem_do_wake(sem, 1); | ||
276 | |||
277 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * downgrade a write lock into a read lock | ||
282 | * - just wake up any readers at the front of the queue | ||
283 | */ | ||
284 | void __downgrade_write(struct rw_semaphore *sem) | ||
285 | { | ||
286 | unsigned long flags; | ||
287 | |||
288 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
289 | |||
290 | sem->activity = 1; | ||
291 | if (!list_empty(&sem->wait_list)) | ||
292 | sem = __rwsem_do_wake(sem, 0); | ||
293 | |||
294 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
295 | } | ||
296 | |||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c new file mode 100644 index 000000000000..19c5fa95e0b4 --- /dev/null +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -0,0 +1,293 @@ | |||
1 | /* rwsem.c: R/W semaphores: contention handling functions | ||
2 | * | ||
3 | * Written by David Howells (dhowells@redhat.com). | ||
4 | * Derived from arch/i386/kernel/semaphore.c | ||
5 | * | ||
6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | ||
7 | * and Michel Lespinasse <walken@google.com> | ||
8 | */ | ||
9 | #include <linux/rwsem.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/export.h> | ||
13 | |||
14 | /* | ||
15 | * Initialize an rwsem: | ||
16 | */ | ||
17 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | ||
18 | struct lock_class_key *key) | ||
19 | { | ||
20 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
21 | /* | ||
22 | * Make sure we are not reinitializing a held semaphore: | ||
23 | */ | ||
24 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | ||
25 | lockdep_init_map(&sem->dep_map, name, key, 0); | ||
26 | #endif | ||
27 | sem->count = RWSEM_UNLOCKED_VALUE; | ||
28 | raw_spin_lock_init(&sem->wait_lock); | ||
29 | INIT_LIST_HEAD(&sem->wait_list); | ||
30 | } | ||
31 | |||
32 | EXPORT_SYMBOL(__init_rwsem); | ||
33 | |||
34 | enum rwsem_waiter_type { | ||
35 | RWSEM_WAITING_FOR_WRITE, | ||
36 | RWSEM_WAITING_FOR_READ | ||
37 | }; | ||
38 | |||
39 | struct rwsem_waiter { | ||
40 | struct list_head list; | ||
41 | struct task_struct *task; | ||
42 | enum rwsem_waiter_type type; | ||
43 | }; | ||
44 | |||
45 | enum rwsem_wake_type { | ||
46 | RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ | ||
47 | RWSEM_WAKE_READERS, /* Wake readers only */ | ||
48 | RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ | ||
49 | }; | ||
50 | |||
51 | /* | ||
52 | * handle the lock release when processes blocked on it that can now run | ||
53 | * - if we come here from up_xxxx(), then: | ||
54 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | ||
55 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | ||
56 | * - there must be someone on the queue | ||
57 | * - the spinlock must be held by the caller | ||
58 | * - woken process blocks are discarded from the list after having task zeroed | ||
59 | * - writers are only woken if downgrading is false | ||
60 | */ | ||
61 | static struct rw_semaphore * | ||
62 | __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) | ||
63 | { | ||
64 | struct rwsem_waiter *waiter; | ||
65 | struct task_struct *tsk; | ||
66 | struct list_head *next; | ||
67 | long oldcount, woken, loop, adjustment; | ||
68 | |||
69 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
70 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | ||
71 | if (wake_type == RWSEM_WAKE_ANY) | ||
72 | /* Wake writer at the front of the queue, but do not | ||
73 | * grant it the lock yet as we want other writers | ||
74 | * to be able to steal it. Readers, on the other hand, | ||
75 | * will block as they will notice the queued writer. | ||
76 | */ | ||
77 | wake_up_process(waiter->task); | ||
78 | goto out; | ||
79 | } | ||
80 | |||
81 | /* Writers might steal the lock before we grant it to the next reader. | ||
82 | * We prefer to do the first reader grant before counting readers | ||
83 | * so we can bail out early if a writer stole the lock. | ||
84 | */ | ||
85 | adjustment = 0; | ||
86 | if (wake_type != RWSEM_WAKE_READ_OWNED) { | ||
87 | adjustment = RWSEM_ACTIVE_READ_BIAS; | ||
88 | try_reader_grant: | ||
89 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | ||
90 | if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { | ||
91 | /* A writer stole the lock. Undo our reader grant. */ | ||
92 | if (rwsem_atomic_update(-adjustment, sem) & | ||
93 | RWSEM_ACTIVE_MASK) | ||
94 | goto out; | ||
95 | /* Last active locker left. Retry waking readers. */ | ||
96 | goto try_reader_grant; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* Grant an infinite number of read locks to the readers at the front | ||
101 | * of the queue. Note we increment the 'active part' of the count by | ||
102 | * the number of readers before waking any processes up. | ||
103 | */ | ||
104 | woken = 0; | ||
105 | do { | ||
106 | woken++; | ||
107 | |||
108 | if (waiter->list.next == &sem->wait_list) | ||
109 | break; | ||
110 | |||
111 | waiter = list_entry(waiter->list.next, | ||
112 | struct rwsem_waiter, list); | ||
113 | |||
114 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | ||
115 | |||
116 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | ||
117 | if (waiter->type != RWSEM_WAITING_FOR_WRITE) | ||
118 | /* hit end of list above */ | ||
119 | adjustment -= RWSEM_WAITING_BIAS; | ||
120 | |||
121 | if (adjustment) | ||
122 | rwsem_atomic_add(adjustment, sem); | ||
123 | |||
124 | next = sem->wait_list.next; | ||
125 | loop = woken; | ||
126 | do { | ||
127 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
128 | next = waiter->list.next; | ||
129 | tsk = waiter->task; | ||
130 | smp_mb(); | ||
131 | waiter->task = NULL; | ||
132 | wake_up_process(tsk); | ||
133 | put_task_struct(tsk); | ||
134 | } while (--loop); | ||
135 | |||
136 | sem->wait_list.next = next; | ||
137 | next->prev = &sem->wait_list; | ||
138 | |||
139 | out: | ||
140 | return sem; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * wait for the read lock to be granted | ||
145 | */ | ||
146 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | ||
147 | { | ||
148 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | ||
149 | struct rwsem_waiter waiter; | ||
150 | struct task_struct *tsk = current; | ||
151 | |||
152 | /* set up my own style of waitqueue */ | ||
153 | waiter.task = tsk; | ||
154 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
155 | get_task_struct(tsk); | ||
156 | |||
157 | raw_spin_lock_irq(&sem->wait_lock); | ||
158 | if (list_empty(&sem->wait_list)) | ||
159 | adjustment += RWSEM_WAITING_BIAS; | ||
160 | list_add_tail(&waiter.list, &sem->wait_list); | ||
161 | |||
162 | /* we're now waiting on the lock, but no longer actively locking */ | ||
163 | count = rwsem_atomic_update(adjustment, sem); | ||
164 | |||
165 | /* If there are no active locks, wake the front queued process(es). | ||
166 | * | ||
167 | * If there are no writers and we are first in the queue, | ||
168 | * wake our own waiter to join the existing active readers ! | ||
169 | */ | ||
170 | if (count == RWSEM_WAITING_BIAS || | ||
171 | (count > RWSEM_WAITING_BIAS && | ||
172 | adjustment != -RWSEM_ACTIVE_READ_BIAS)) | ||
173 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); | ||
174 | |||
175 | raw_spin_unlock_irq(&sem->wait_lock); | ||
176 | |||
177 | /* wait to be given the lock */ | ||
178 | while (true) { | ||
179 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
180 | if (!waiter.task) | ||
181 | break; | ||
182 | schedule(); | ||
183 | } | ||
184 | |||
185 | tsk->state = TASK_RUNNING; | ||
186 | |||
187 | return sem; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * wait until we successfully acquire the write lock | ||
192 | */ | ||
193 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | ||
194 | { | ||
195 | long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; | ||
196 | struct rwsem_waiter waiter; | ||
197 | struct task_struct *tsk = current; | ||
198 | |||
199 | /* set up my own style of waitqueue */ | ||
200 | waiter.task = tsk; | ||
201 | waiter.type = RWSEM_WAITING_FOR_WRITE; | ||
202 | |||
203 | raw_spin_lock_irq(&sem->wait_lock); | ||
204 | if (list_empty(&sem->wait_list)) | ||
205 | adjustment += RWSEM_WAITING_BIAS; | ||
206 | list_add_tail(&waiter.list, &sem->wait_list); | ||
207 | |||
208 | /* we're now waiting on the lock, but no longer actively locking */ | ||
209 | count = rwsem_atomic_update(adjustment, sem); | ||
210 | |||
211 | /* If there were already threads queued before us and there are no | ||
212 | * active writers, the lock must be read owned; so we try to wake | ||
213 | * any read locks that were queued ahead of us. */ | ||
214 | if (count > RWSEM_WAITING_BIAS && | ||
215 | adjustment == -RWSEM_ACTIVE_WRITE_BIAS) | ||
216 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); | ||
217 | |||
218 | /* wait until we successfully acquire the lock */ | ||
219 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
220 | while (true) { | ||
221 | if (!(count & RWSEM_ACTIVE_MASK)) { | ||
222 | /* Try acquiring the write lock. */ | ||
223 | count = RWSEM_ACTIVE_WRITE_BIAS; | ||
224 | if (!list_is_singular(&sem->wait_list)) | ||
225 | count += RWSEM_WAITING_BIAS; | ||
226 | |||
227 | if (sem->count == RWSEM_WAITING_BIAS && | ||
228 | cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == | ||
229 | RWSEM_WAITING_BIAS) | ||
230 | break; | ||
231 | } | ||
232 | |||
233 | raw_spin_unlock_irq(&sem->wait_lock); | ||
234 | |||
235 | /* Block until there are no active lockers. */ | ||
236 | do { | ||
237 | schedule(); | ||
238 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
239 | } while ((count = sem->count) & RWSEM_ACTIVE_MASK); | ||
240 | |||
241 | raw_spin_lock_irq(&sem->wait_lock); | ||
242 | } | ||
243 | |||
244 | list_del(&waiter.list); | ||
245 | raw_spin_unlock_irq(&sem->wait_lock); | ||
246 | tsk->state = TASK_RUNNING; | ||
247 | |||
248 | return sem; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * handle waking up a waiter on the semaphore | ||
253 | * - up_read/up_write has decremented the active part of count if we come here | ||
254 | */ | ||
255 | struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | ||
256 | { | ||
257 | unsigned long flags; | ||
258 | |||
259 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
260 | |||
261 | /* do nothing if list empty */ | ||
262 | if (!list_empty(&sem->wait_list)) | ||
263 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); | ||
264 | |||
265 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
266 | |||
267 | return sem; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * downgrade a write lock into a read lock | ||
272 | * - caller incremented waiting part of count and discovered it still negative | ||
273 | * - just wake up any readers at the front of the queue | ||
274 | */ | ||
275 | struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) | ||
276 | { | ||
277 | unsigned long flags; | ||
278 | |||
279 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
280 | |||
281 | /* do nothing if list empty */ | ||
282 | if (!list_empty(&sem->wait_list)) | ||
283 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); | ||
284 | |||
285 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
286 | |||
287 | return sem; | ||
288 | } | ||
289 | |||
290 | EXPORT_SYMBOL(rwsem_down_read_failed); | ||
291 | EXPORT_SYMBOL(rwsem_down_write_failed); | ||
292 | EXPORT_SYMBOL(rwsem_wake); | ||
293 | EXPORT_SYMBOL(rwsem_downgrade_wake); | ||
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c new file mode 100644 index 000000000000..cfff1435bdfb --- /dev/null +++ b/kernel/locking/rwsem.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* kernel/rwsem.c: R/W semaphores, public implementation | ||
2 | * | ||
3 | * Written by David Howells (dhowells@redhat.com). | ||
4 | * Derived from asm-i386/semaphore.h | ||
5 | */ | ||
6 | |||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/export.h> | ||
11 | #include <linux/rwsem.h> | ||
12 | |||
13 | #include <linux/atomic.h> | ||
14 | |||
15 | /* | ||
16 | * lock for reading | ||
17 | */ | ||
18 | void __sched down_read(struct rw_semaphore *sem) | ||
19 | { | ||
20 | might_sleep(); | ||
21 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | ||
22 | |||
23 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | ||
24 | } | ||
25 | |||
26 | EXPORT_SYMBOL(down_read); | ||
27 | |||
28 | /* | ||
29 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
30 | */ | ||
31 | int down_read_trylock(struct rw_semaphore *sem) | ||
32 | { | ||
33 | int ret = __down_read_trylock(sem); | ||
34 | |||
35 | if (ret == 1) | ||
36 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); | ||
37 | return ret; | ||
38 | } | ||
39 | |||
40 | EXPORT_SYMBOL(down_read_trylock); | ||
41 | |||
42 | /* | ||
43 | * lock for writing | ||
44 | */ | ||
45 | void __sched down_write(struct rw_semaphore *sem) | ||
46 | { | ||
47 | might_sleep(); | ||
48 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | ||
49 | |||
50 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | ||
51 | } | ||
52 | |||
53 | EXPORT_SYMBOL(down_write); | ||
54 | |||
55 | /* | ||
56 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
57 | */ | ||
58 | int down_write_trylock(struct rw_semaphore *sem) | ||
59 | { | ||
60 | int ret = __down_write_trylock(sem); | ||
61 | |||
62 | if (ret == 1) | ||
63 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); | ||
64 | return ret; | ||
65 | } | ||
66 | |||
67 | EXPORT_SYMBOL(down_write_trylock); | ||
68 | |||
69 | /* | ||
70 | * release a read lock | ||
71 | */ | ||
72 | void up_read(struct rw_semaphore *sem) | ||
73 | { | ||
74 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | ||
75 | |||
76 | __up_read(sem); | ||
77 | } | ||
78 | |||
79 | EXPORT_SYMBOL(up_read); | ||
80 | |||
81 | /* | ||
82 | * release a write lock | ||
83 | */ | ||
84 | void up_write(struct rw_semaphore *sem) | ||
85 | { | ||
86 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | ||
87 | |||
88 | __up_write(sem); | ||
89 | } | ||
90 | |||
91 | EXPORT_SYMBOL(up_write); | ||
92 | |||
93 | /* | ||
94 | * downgrade write lock to read lock | ||
95 | */ | ||
96 | void downgrade_write(struct rw_semaphore *sem) | ||
97 | { | ||
98 | /* | ||
99 | * lockdep: a downgraded write will live on as a write | ||
100 | * dependency. | ||
101 | */ | ||
102 | __downgrade_write(sem); | ||
103 | } | ||
104 | |||
105 | EXPORT_SYMBOL(downgrade_write); | ||
106 | |||
107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
108 | |||
109 | void down_read_nested(struct rw_semaphore *sem, int subclass) | ||
110 | { | ||
111 | might_sleep(); | ||
112 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | ||
113 | |||
114 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | ||
115 | } | ||
116 | |||
117 | EXPORT_SYMBOL(down_read_nested); | ||
118 | |||
119 | void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) | ||
120 | { | ||
121 | might_sleep(); | ||
122 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | ||
123 | |||
124 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | ||
125 | } | ||
126 | |||
127 | EXPORT_SYMBOL(_down_write_nest_lock); | ||
128 | |||
129 | void down_read_non_owner(struct rw_semaphore *sem) | ||
130 | { | ||
131 | might_sleep(); | ||
132 | |||
133 | __down_read(sem); | ||
134 | } | ||
135 | |||
136 | EXPORT_SYMBOL(down_read_non_owner); | ||
137 | |||
138 | void down_write_nested(struct rw_semaphore *sem, int subclass) | ||
139 | { | ||
140 | might_sleep(); | ||
141 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | ||
142 | |||
143 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | ||
144 | } | ||
145 | |||
146 | EXPORT_SYMBOL(down_write_nested); | ||
147 | |||
148 | void up_read_non_owner(struct rw_semaphore *sem) | ||
149 | { | ||
150 | __up_read(sem); | ||
151 | } | ||
152 | |||
153 | EXPORT_SYMBOL(up_read_non_owner); | ||
154 | |||
155 | #endif | ||
156 | |||
157 | |||
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c new file mode 100644 index 000000000000..6815171a4fff --- /dev/null +++ b/kernel/locking/semaphore.c | |||
@@ -0,0 +1,263 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Intel Corporation | ||
3 | * Author: Matthew Wilcox <willy@linux.intel.com> | ||
4 | * | ||
5 | * Distributed under the terms of the GNU GPL, version 2 | ||
6 | * | ||
7 | * This file implements counting semaphores. | ||
8 | * A counting semaphore may be acquired 'n' times before sleeping. | ||
9 | * See mutex.c for single-acquisition sleeping locks which enforce | ||
10 | * rules which allow code to be debugged more easily. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Some notes on the implementation: | ||
15 | * | ||
16 | * The spinlock controls access to the other members of the semaphore. | ||
17 | * down_trylock() and up() can be called from interrupt context, so we | ||
18 | * have to disable interrupts when taking the lock. It turns out various | ||
19 | * parts of the kernel expect to be able to use down() on a semaphore in | ||
20 | * interrupt context when they know it will succeed, so we have to use | ||
21 | * irqsave variants for down(), down_interruptible() and down_killable() | ||
22 | * too. | ||
23 | * | ||
24 | * The ->count variable represents how many more tasks can acquire this | ||
25 | * semaphore. If it's zero, there may be tasks waiting on the wait_list. | ||
26 | */ | ||
27 | |||
28 | #include <linux/compiler.h> | ||
29 | #include <linux/kernel.h> | ||
30 | #include <linux/export.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/semaphore.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/ftrace.h> | ||
35 | |||
36 | static noinline void __down(struct semaphore *sem); | ||
37 | static noinline int __down_interruptible(struct semaphore *sem); | ||
38 | static noinline int __down_killable(struct semaphore *sem); | ||
39 | static noinline int __down_timeout(struct semaphore *sem, long jiffies); | ||
40 | static noinline void __up(struct semaphore *sem); | ||
41 | |||
42 | /** | ||
43 | * down - acquire the semaphore | ||
44 | * @sem: the semaphore to be acquired | ||
45 | * | ||
46 | * Acquires the semaphore. If no more tasks are allowed to acquire the | ||
47 | * semaphore, calling this function will put the task to sleep until the | ||
48 | * semaphore is released. | ||
49 | * | ||
50 | * Use of this function is deprecated, please use down_interruptible() or | ||
51 | * down_killable() instead. | ||
52 | */ | ||
53 | void down(struct semaphore *sem) | ||
54 | { | ||
55 | unsigned long flags; | ||
56 | |||
57 | raw_spin_lock_irqsave(&sem->lock, flags); | ||
58 | if (likely(sem->count > 0)) | ||
59 | sem->count--; | ||
60 | else | ||
61 | __down(sem); | ||
62 | raw_spin_unlock_irqrestore(&sem->lock, flags); | ||
63 | } | ||
64 | EXPORT_SYMBOL(down); | ||
65 | |||
66 | /** | ||
67 | * down_interruptible - acquire the semaphore unless interrupted | ||
68 | * @sem: the semaphore to be acquired | ||
69 | * | ||
70 | * Attempts to acquire the semaphore. If no more tasks are allowed to | ||
71 | * acquire the semaphore, calling this function will put the task to sleep. | ||
72 | * If the sleep is interrupted by a signal, this function will return -EINTR. | ||
73 | * If the semaphore is successfully acquired, this function returns 0. | ||
74 | */ | ||
75 | int down_interruptible(struct semaphore *sem) | ||
76 | { | ||
77 | unsigned long flags; | ||
78 | int result = 0; | ||
79 | |||
80 | raw_spin_lock_irqsave(&sem->lock, flags); | ||
81 | if (likely(sem->count > 0)) | ||
82 | sem->count--; | ||
83 | else | ||
84 | result = __down_interruptible(sem); | ||
85 | raw_spin_unlock_irqrestore(&sem->lock, flags); | ||
86 | |||
87 | return result; | ||
88 | } | ||
89 | EXPORT_SYMBOL(down_interruptible); | ||
90 | |||
91 | /** | ||
92 | * down_killable - acquire the semaphore unless killed | ||
93 | * @sem: the semaphore to be acquired | ||
94 | * | ||
95 | * Attempts to acquire the semaphore. If no more tasks are allowed to | ||
96 | * acquire the semaphore, calling this function will put the task to sleep. | ||
97 | * If the sleep is interrupted by a fatal signal, this function will return | ||
98 | * -EINTR. If the semaphore is successfully acquired, this function returns | ||
99 | * 0. | ||
100 | */ | ||
101 | int down_killable(struct semaphore *sem) | ||
102 | { | ||
103 | unsigned long flags; | ||
104 | int result = 0; | ||
105 | |||
106 | raw_spin_lock_irqsave(&sem->lock, flags); | ||
107 | if (likely(sem->count > 0)) | ||
108 | sem->count--; | ||
109 | else | ||
110 | result = __down_killable(sem); | ||
111 | raw_spin_unlock_irqrestore(&sem->lock, flags); | ||
112 | |||
113 | return result; | ||
114 | } | ||
115 | EXPORT_SYMBOL(down_killable); | ||
116 | |||
117 | /** | ||
118 | * down_trylock - try to acquire the semaphore, without waiting | ||
119 | * @sem: the semaphore to be acquired | ||
120 | * | ||
121 | * Try to acquire the semaphore atomically. Returns 0 if the semaphore has | ||
122 | * been acquired successfully or 1 if it it cannot be acquired. | ||
123 | * | ||
124 | * NOTE: This return value is inverted from both spin_trylock and | ||
125 | * mutex_trylock! Be careful about this when converting code. | ||
126 | * | ||
127 | * Unlike mutex_trylock, this function can be used from interrupt context, | ||
128 | * and the semaphore can be released by any task or interrupt. | ||
129 | */ | ||
130 | int down_trylock(struct semaphore *sem) | ||
131 | { | ||
132 | unsigned long flags; | ||
133 | int count; | ||
134 | |||
135 | raw_spin_lock_irqsave(&sem->lock, flags); | ||
136 | count = sem->count - 1; | ||
137 | if (likely(count >= 0)) | ||
138 | sem->count = count; | ||
139 | raw_spin_unlock_irqrestore(&sem->lock, flags); | ||
140 | |||
141 | return (count < 0); | ||
142 | } | ||
143 | EXPORT_SYMBOL(down_trylock); | ||
144 | |||
145 | /** | ||
146 | * down_timeout - acquire the semaphore within a specified time | ||
147 | * @sem: the semaphore to be acquired | ||
148 | * @jiffies: how long to wait before failing | ||
149 | * | ||
150 | * Attempts to acquire the semaphore. If no more tasks are allowed to | ||
151 | * acquire the semaphore, calling this function will put the task to sleep. | ||
152 | * If the semaphore is not released within the specified number of jiffies, | ||
153 | * this function returns -ETIME. It returns 0 if the semaphore was acquired. | ||
154 | */ | ||
155 | int down_timeout(struct semaphore *sem, long jiffies) | ||
156 | { | ||
157 | unsigned long flags; | ||
158 | int result = 0; | ||
159 | |||
160 | raw_spin_lock_irqsave(&sem->lock, flags); | ||
161 | if (likely(sem->count > 0)) | ||
162 | sem->count--; | ||
163 | else | ||
164 | result = __down_timeout(sem, jiffies); | ||
165 | raw_spin_unlock_irqrestore(&sem->lock, flags); | ||
166 | |||
167 | return result; | ||
168 | } | ||
169 | EXPORT_SYMBOL(down_timeout); | ||
170 | |||
171 | /** | ||
172 | * up - release the semaphore | ||
173 | * @sem: the semaphore to release | ||
174 | * | ||
175 | * Release the semaphore. Unlike mutexes, up() may be called from any | ||
176 | * context and even by tasks which have never called down(). | ||
177 | */ | ||
178 | void up(struct semaphore *sem) | ||
179 | { | ||
180 | unsigned long flags; | ||
181 | |||
182 | raw_spin_lock_irqsave(&sem->lock, flags); | ||
183 | if (likely(list_empty(&sem->wait_list))) | ||
184 | sem->count++; | ||
185 | else | ||
186 | __up(sem); | ||
187 | raw_spin_unlock_irqrestore(&sem->lock, flags); | ||
188 | } | ||
189 | EXPORT_SYMBOL(up); | ||
190 | |||
191 | /* Functions for the contended case */ | ||
192 | |||
193 | struct semaphore_waiter { | ||
194 | struct list_head list; | ||
195 | struct task_struct *task; | ||
196 | bool up; | ||
197 | }; | ||
198 | |||
199 | /* | ||
200 | * Because this function is inlined, the 'state' parameter will be | ||
201 | * constant, and thus optimised away by the compiler. Likewise the | ||
202 | * 'timeout' parameter for the cases without timeouts. | ||
203 | */ | ||
204 | static inline int __sched __down_common(struct semaphore *sem, long state, | ||
205 | long timeout) | ||
206 | { | ||
207 | struct task_struct *task = current; | ||
208 | struct semaphore_waiter waiter; | ||
209 | |||
210 | list_add_tail(&waiter.list, &sem->wait_list); | ||
211 | waiter.task = task; | ||
212 | waiter.up = false; | ||
213 | |||
214 | for (;;) { | ||
215 | if (signal_pending_state(state, task)) | ||
216 | goto interrupted; | ||
217 | if (unlikely(timeout <= 0)) | ||
218 | goto timed_out; | ||
219 | __set_task_state(task, state); | ||
220 | raw_spin_unlock_irq(&sem->lock); | ||
221 | timeout = schedule_timeout(timeout); | ||
222 | raw_spin_lock_irq(&sem->lock); | ||
223 | if (waiter.up) | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | timed_out: | ||
228 | list_del(&waiter.list); | ||
229 | return -ETIME; | ||
230 | |||
231 | interrupted: | ||
232 | list_del(&waiter.list); | ||
233 | return -EINTR; | ||
234 | } | ||
235 | |||
236 | static noinline void __sched __down(struct semaphore *sem) | ||
237 | { | ||
238 | __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
239 | } | ||
240 | |||
241 | static noinline int __sched __down_interruptible(struct semaphore *sem) | ||
242 | { | ||
243 | return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
244 | } | ||
245 | |||
246 | static noinline int __sched __down_killable(struct semaphore *sem) | ||
247 | { | ||
248 | return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); | ||
249 | } | ||
250 | |||
251 | static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) | ||
252 | { | ||
253 | return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); | ||
254 | } | ||
255 | |||
256 | static noinline void __sched __up(struct semaphore *sem) | ||
257 | { | ||
258 | struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, | ||
259 | struct semaphore_waiter, list); | ||
260 | list_del(&waiter->list); | ||
261 | waiter->up = true; | ||
262 | wake_up_process(waiter->task); | ||
263 | } | ||
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c new file mode 100644 index 000000000000..4b082b5cac9e --- /dev/null +++ b/kernel/locking/spinlock.c | |||
@@ -0,0 +1,399 @@ | |||
1 | /* | ||
2 | * Copyright (2004) Linus Torvalds | ||
3 | * | ||
4 | * Author: Zwane Mwaikambo <zwane@fsmlabs.com> | ||
5 | * | ||
6 | * Copyright (2004, 2005) Ingo Molnar | ||
7 | * | ||
8 | * This file contains the spinlock/rwlock implementations for the | ||
9 | * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) | ||
10 | * | ||
11 | * Note that some architectures have special knowledge about the | ||
12 | * stack frames of these functions in their profile_pc. If you | ||
13 | * change anything significant here that could change the stack | ||
14 | * frame contact the architecture maintainers. | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include <linux/preempt.h> | ||
19 | #include <linux/spinlock.h> | ||
20 | #include <linux/interrupt.h> | ||
21 | #include <linux/debug_locks.h> | ||
22 | #include <linux/export.h> | ||
23 | |||
24 | /* | ||
25 | * If lockdep is enabled then we use the non-preemption spin-ops | ||
26 | * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are | ||
27 | * not re-enabled during lock-acquire (which the preempt-spin-ops do): | ||
28 | */ | ||
29 | #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) | ||
30 | /* | ||
31 | * The __lock_function inlines are taken from | ||
32 | * include/linux/spinlock_api_smp.h | ||
33 | */ | ||
34 | #else | ||
35 | #define raw_read_can_lock(l) read_can_lock(l) | ||
36 | #define raw_write_can_lock(l) write_can_lock(l) | ||
37 | |||
38 | /* | ||
39 | * Some architectures can relax in favour of the CPU owning the lock. | ||
40 | */ | ||
41 | #ifndef arch_read_relax | ||
42 | # define arch_read_relax(l) cpu_relax() | ||
43 | #endif | ||
44 | #ifndef arch_write_relax | ||
45 | # define arch_write_relax(l) cpu_relax() | ||
46 | #endif | ||
47 | #ifndef arch_spin_relax | ||
48 | # define arch_spin_relax(l) cpu_relax() | ||
49 | #endif | ||
50 | |||
51 | /* | ||
52 | * We build the __lock_function inlines here. They are too large for | ||
53 | * inlining all over the place, but here is only one user per function | ||
54 | * which embedds them into the calling _lock_function below. | ||
55 | * | ||
56 | * This could be a long-held lock. We both prepare to spin for a long | ||
57 | * time (making _this_ CPU preemptable if possible), and we also signal | ||
58 | * towards that other CPU that it should break the lock ASAP. | ||
59 | */ | ||
60 | #define BUILD_LOCK_OPS(op, locktype) \ | ||
61 | void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ | ||
62 | { \ | ||
63 | for (;;) { \ | ||
64 | preempt_disable(); \ | ||
65 | if (likely(do_raw_##op##_trylock(lock))) \ | ||
66 | break; \ | ||
67 | preempt_enable(); \ | ||
68 | \ | ||
69 | if (!(lock)->break_lock) \ | ||
70 | (lock)->break_lock = 1; \ | ||
71 | while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\ | ||
72 | arch_##op##_relax(&lock->raw_lock); \ | ||
73 | } \ | ||
74 | (lock)->break_lock = 0; \ | ||
75 | } \ | ||
76 | \ | ||
77 | unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ | ||
78 | { \ | ||
79 | unsigned long flags; \ | ||
80 | \ | ||
81 | for (;;) { \ | ||
82 | preempt_disable(); \ | ||
83 | local_irq_save(flags); \ | ||
84 | if (likely(do_raw_##op##_trylock(lock))) \ | ||
85 | break; \ | ||
86 | local_irq_restore(flags); \ | ||
87 | preempt_enable(); \ | ||
88 | \ | ||
89 | if (!(lock)->break_lock) \ | ||
90 | (lock)->break_lock = 1; \ | ||
91 | while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\ | ||
92 | arch_##op##_relax(&lock->raw_lock); \ | ||
93 | } \ | ||
94 | (lock)->break_lock = 0; \ | ||
95 | return flags; \ | ||
96 | } \ | ||
97 | \ | ||
98 | void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ | ||
99 | { \ | ||
100 | _raw_##op##_lock_irqsave(lock); \ | ||
101 | } \ | ||
102 | \ | ||
103 | void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ | ||
104 | { \ | ||
105 | unsigned long flags; \ | ||
106 | \ | ||
107 | /* */ \ | ||
108 | /* Careful: we must exclude softirqs too, hence the */ \ | ||
109 | /* irq-disabling. We use the generic preemption-aware */ \ | ||
110 | /* function: */ \ | ||
111 | /**/ \ | ||
112 | flags = _raw_##op##_lock_irqsave(lock); \ | ||
113 | local_bh_disable(); \ | ||
114 | local_irq_restore(flags); \ | ||
115 | } \ | ||
116 | |||
117 | /* | ||
118 | * Build preemption-friendly versions of the following | ||
119 | * lock-spinning functions: | ||
120 | * | ||
121 | * __[spin|read|write]_lock() | ||
122 | * __[spin|read|write]_lock_irq() | ||
123 | * __[spin|read|write]_lock_irqsave() | ||
124 | * __[spin|read|write]_lock_bh() | ||
125 | */ | ||
126 | BUILD_LOCK_OPS(spin, raw_spinlock); | ||
127 | BUILD_LOCK_OPS(read, rwlock); | ||
128 | BUILD_LOCK_OPS(write, rwlock); | ||
129 | |||
130 | #endif | ||
131 | |||
132 | #ifndef CONFIG_INLINE_SPIN_TRYLOCK | ||
133 | int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock) | ||
134 | { | ||
135 | return __raw_spin_trylock(lock); | ||
136 | } | ||
137 | EXPORT_SYMBOL(_raw_spin_trylock); | ||
138 | #endif | ||
139 | |||
140 | #ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH | ||
141 | int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock) | ||
142 | { | ||
143 | return __raw_spin_trylock_bh(lock); | ||
144 | } | ||
145 | EXPORT_SYMBOL(_raw_spin_trylock_bh); | ||
146 | #endif | ||
147 | |||
148 | #ifndef CONFIG_INLINE_SPIN_LOCK | ||
149 | void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) | ||
150 | { | ||
151 | __raw_spin_lock(lock); | ||
152 | } | ||
153 | EXPORT_SYMBOL(_raw_spin_lock); | ||
154 | #endif | ||
155 | |||
156 | #ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE | ||
157 | unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock) | ||
158 | { | ||
159 | return __raw_spin_lock_irqsave(lock); | ||
160 | } | ||
161 | EXPORT_SYMBOL(_raw_spin_lock_irqsave); | ||
162 | #endif | ||
163 | |||
164 | #ifndef CONFIG_INLINE_SPIN_LOCK_IRQ | ||
165 | void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock) | ||
166 | { | ||
167 | __raw_spin_lock_irq(lock); | ||
168 | } | ||
169 | EXPORT_SYMBOL(_raw_spin_lock_irq); | ||
170 | #endif | ||
171 | |||
172 | #ifndef CONFIG_INLINE_SPIN_LOCK_BH | ||
173 | void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) | ||
174 | { | ||
175 | __raw_spin_lock_bh(lock); | ||
176 | } | ||
177 | EXPORT_SYMBOL(_raw_spin_lock_bh); | ||
178 | #endif | ||
179 | |||
180 | #ifdef CONFIG_UNINLINE_SPIN_UNLOCK | ||
181 | void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) | ||
182 | { | ||
183 | __raw_spin_unlock(lock); | ||
184 | } | ||
185 | EXPORT_SYMBOL(_raw_spin_unlock); | ||
186 | #endif | ||
187 | |||
188 | #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE | ||
189 | void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) | ||
190 | { | ||
191 | __raw_spin_unlock_irqrestore(lock, flags); | ||
192 | } | ||
193 | EXPORT_SYMBOL(_raw_spin_unlock_irqrestore); | ||
194 | #endif | ||
195 | |||
196 | #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ | ||
197 | void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock) | ||
198 | { | ||
199 | __raw_spin_unlock_irq(lock); | ||
200 | } | ||
201 | EXPORT_SYMBOL(_raw_spin_unlock_irq); | ||
202 | #endif | ||
203 | |||
204 | #ifndef CONFIG_INLINE_SPIN_UNLOCK_BH | ||
205 | void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) | ||
206 | { | ||
207 | __raw_spin_unlock_bh(lock); | ||
208 | } | ||
209 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | ||
210 | #endif | ||
211 | |||
212 | #ifndef CONFIG_INLINE_READ_TRYLOCK | ||
213 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | ||
214 | { | ||
215 | return __raw_read_trylock(lock); | ||
216 | } | ||
217 | EXPORT_SYMBOL(_raw_read_trylock); | ||
218 | #endif | ||
219 | |||
220 | #ifndef CONFIG_INLINE_READ_LOCK | ||
221 | void __lockfunc _raw_read_lock(rwlock_t *lock) | ||
222 | { | ||
223 | __raw_read_lock(lock); | ||
224 | } | ||
225 | EXPORT_SYMBOL(_raw_read_lock); | ||
226 | #endif | ||
227 | |||
228 | #ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE | ||
229 | unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) | ||
230 | { | ||
231 | return __raw_read_lock_irqsave(lock); | ||
232 | } | ||
233 | EXPORT_SYMBOL(_raw_read_lock_irqsave); | ||
234 | #endif | ||
235 | |||
236 | #ifndef CONFIG_INLINE_READ_LOCK_IRQ | ||
237 | void __lockfunc _raw_read_lock_irq(rwlock_t *lock) | ||
238 | { | ||
239 | __raw_read_lock_irq(lock); | ||
240 | } | ||
241 | EXPORT_SYMBOL(_raw_read_lock_irq); | ||
242 | #endif | ||
243 | |||
244 | #ifndef CONFIG_INLINE_READ_LOCK_BH | ||
245 | void __lockfunc _raw_read_lock_bh(rwlock_t *lock) | ||
246 | { | ||
247 | __raw_read_lock_bh(lock); | ||
248 | } | ||
249 | EXPORT_SYMBOL(_raw_read_lock_bh); | ||
250 | #endif | ||
251 | |||
252 | #ifndef CONFIG_INLINE_READ_UNLOCK | ||
253 | void __lockfunc _raw_read_unlock(rwlock_t *lock) | ||
254 | { | ||
255 | __raw_read_unlock(lock); | ||
256 | } | ||
257 | EXPORT_SYMBOL(_raw_read_unlock); | ||
258 | #endif | ||
259 | |||
260 | #ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE | ||
261 | void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) | ||
262 | { | ||
263 | __raw_read_unlock_irqrestore(lock, flags); | ||
264 | } | ||
265 | EXPORT_SYMBOL(_raw_read_unlock_irqrestore); | ||
266 | #endif | ||
267 | |||
268 | #ifndef CONFIG_INLINE_READ_UNLOCK_IRQ | ||
269 | void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) | ||
270 | { | ||
271 | __raw_read_unlock_irq(lock); | ||
272 | } | ||
273 | EXPORT_SYMBOL(_raw_read_unlock_irq); | ||
274 | #endif | ||
275 | |||
276 | #ifndef CONFIG_INLINE_READ_UNLOCK_BH | ||
277 | void __lockfunc _raw_read_unlock_bh(rwlock_t *lock) | ||
278 | { | ||
279 | __raw_read_unlock_bh(lock); | ||
280 | } | ||
281 | EXPORT_SYMBOL(_raw_read_unlock_bh); | ||
282 | #endif | ||
283 | |||
284 | #ifndef CONFIG_INLINE_WRITE_TRYLOCK | ||
285 | int __lockfunc _raw_write_trylock(rwlock_t *lock) | ||
286 | { | ||
287 | return __raw_write_trylock(lock); | ||
288 | } | ||
289 | EXPORT_SYMBOL(_raw_write_trylock); | ||
290 | #endif | ||
291 | |||
292 | #ifndef CONFIG_INLINE_WRITE_LOCK | ||
293 | void __lockfunc _raw_write_lock(rwlock_t *lock) | ||
294 | { | ||
295 | __raw_write_lock(lock); | ||
296 | } | ||
297 | EXPORT_SYMBOL(_raw_write_lock); | ||
298 | #endif | ||
299 | |||
300 | #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE | ||
301 | unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) | ||
302 | { | ||
303 | return __raw_write_lock_irqsave(lock); | ||
304 | } | ||
305 | EXPORT_SYMBOL(_raw_write_lock_irqsave); | ||
306 | #endif | ||
307 | |||
308 | #ifndef CONFIG_INLINE_WRITE_LOCK_IRQ | ||
309 | void __lockfunc _raw_write_lock_irq(rwlock_t *lock) | ||
310 | { | ||
311 | __raw_write_lock_irq(lock); | ||
312 | } | ||
313 | EXPORT_SYMBOL(_raw_write_lock_irq); | ||
314 | #endif | ||
315 | |||
316 | #ifndef CONFIG_INLINE_WRITE_LOCK_BH | ||
317 | void __lockfunc _raw_write_lock_bh(rwlock_t *lock) | ||
318 | { | ||
319 | __raw_write_lock_bh(lock); | ||
320 | } | ||
321 | EXPORT_SYMBOL(_raw_write_lock_bh); | ||
322 | #endif | ||
323 | |||
324 | #ifndef CONFIG_INLINE_WRITE_UNLOCK | ||
325 | void __lockfunc _raw_write_unlock(rwlock_t *lock) | ||
326 | { | ||
327 | __raw_write_unlock(lock); | ||
328 | } | ||
329 | EXPORT_SYMBOL(_raw_write_unlock); | ||
330 | #endif | ||
331 | |||
332 | #ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE | ||
333 | void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) | ||
334 | { | ||
335 | __raw_write_unlock_irqrestore(lock, flags); | ||
336 | } | ||
337 | EXPORT_SYMBOL(_raw_write_unlock_irqrestore); | ||
338 | #endif | ||
339 | |||
340 | #ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ | ||
341 | void __lockfunc _raw_write_unlock_irq(rwlock_t *lock) | ||
342 | { | ||
343 | __raw_write_unlock_irq(lock); | ||
344 | } | ||
345 | EXPORT_SYMBOL(_raw_write_unlock_irq); | ||
346 | #endif | ||
347 | |||
348 | #ifndef CONFIG_INLINE_WRITE_UNLOCK_BH | ||
349 | void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) | ||
350 | { | ||
351 | __raw_write_unlock_bh(lock); | ||
352 | } | ||
353 | EXPORT_SYMBOL(_raw_write_unlock_bh); | ||
354 | #endif | ||
355 | |||
356 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
357 | |||
358 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | ||
359 | { | ||
360 | preempt_disable(); | ||
361 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | ||
362 | LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); | ||
363 | } | ||
364 | EXPORT_SYMBOL(_raw_spin_lock_nested); | ||
365 | |||
366 | unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, | ||
367 | int subclass) | ||
368 | { | ||
369 | unsigned long flags; | ||
370 | |||
371 | local_irq_save(flags); | ||
372 | preempt_disable(); | ||
373 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | ||
374 | LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock, | ||
375 | do_raw_spin_lock_flags, &flags); | ||
376 | return flags; | ||
377 | } | ||
378 | EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested); | ||
379 | |||
380 | void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, | ||
381 | struct lockdep_map *nest_lock) | ||
382 | { | ||
383 | preempt_disable(); | ||
384 | spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); | ||
385 | LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); | ||
386 | } | ||
387 | EXPORT_SYMBOL(_raw_spin_lock_nest_lock); | ||
388 | |||
389 | #endif | ||
390 | |||
391 | notrace int in_lock_functions(unsigned long addr) | ||
392 | { | ||
393 | /* Linker adds these: start and end of __lockfunc functions */ | ||
394 | extern char __lock_text_start[], __lock_text_end[]; | ||
395 | |||
396 | return addr >= (unsigned long)__lock_text_start | ||
397 | && addr < (unsigned long)__lock_text_end; | ||
398 | } | ||
399 | EXPORT_SYMBOL(in_lock_functions); | ||
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c new file mode 100644 index 000000000000..0374a596cffa --- /dev/null +++ b/kernel/locking/spinlock_debug.c | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * Copyright 2005, Red Hat, Inc., Ingo Molnar | ||
3 | * Released under the General Public License (GPL). | ||
4 | * | ||
5 | * This file contains the spinlock/rwlock implementations for | ||
6 | * DEBUG_SPINLOCK. | ||
7 | */ | ||
8 | |||
9 | #include <linux/spinlock.h> | ||
10 | #include <linux/nmi.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/debug_locks.h> | ||
13 | #include <linux/delay.h> | ||
14 | #include <linux/export.h> | ||
15 | |||
16 | void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, | ||
17 | struct lock_class_key *key) | ||
18 | { | ||
19 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
20 | /* | ||
21 | * Make sure we are not reinitializing a held lock: | ||
22 | */ | ||
23 | debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | ||
24 | lockdep_init_map(&lock->dep_map, name, key, 0); | ||
25 | #endif | ||
26 | lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | ||
27 | lock->magic = SPINLOCK_MAGIC; | ||
28 | lock->owner = SPINLOCK_OWNER_INIT; | ||
29 | lock->owner_cpu = -1; | ||
30 | } | ||
31 | |||
32 | EXPORT_SYMBOL(__raw_spin_lock_init); | ||
33 | |||
34 | void __rwlock_init(rwlock_t *lock, const char *name, | ||
35 | struct lock_class_key *key) | ||
36 | { | ||
37 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
38 | /* | ||
39 | * Make sure we are not reinitializing a held lock: | ||
40 | */ | ||
41 | debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | ||
42 | lockdep_init_map(&lock->dep_map, name, key, 0); | ||
43 | #endif | ||
44 | lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; | ||
45 | lock->magic = RWLOCK_MAGIC; | ||
46 | lock->owner = SPINLOCK_OWNER_INIT; | ||
47 | lock->owner_cpu = -1; | ||
48 | } | ||
49 | |||
50 | EXPORT_SYMBOL(__rwlock_init); | ||
51 | |||
52 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | ||
53 | { | ||
54 | struct task_struct *owner = NULL; | ||
55 | |||
56 | if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) | ||
57 | owner = lock->owner; | ||
58 | printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", | ||
59 | msg, raw_smp_processor_id(), | ||
60 | current->comm, task_pid_nr(current)); | ||
61 | printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " | ||
62 | ".owner_cpu: %d\n", | ||
63 | lock, lock->magic, | ||
64 | owner ? owner->comm : "<none>", | ||
65 | owner ? task_pid_nr(owner) : -1, | ||
66 | lock->owner_cpu); | ||
67 | dump_stack(); | ||
68 | } | ||
69 | |||
70 | static void spin_bug(raw_spinlock_t *lock, const char *msg) | ||
71 | { | ||
72 | if (!debug_locks_off()) | ||
73 | return; | ||
74 | |||
75 | spin_dump(lock, msg); | ||
76 | } | ||
77 | |||
78 | #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) | ||
79 | |||
80 | static inline void | ||
81 | debug_spin_lock_before(raw_spinlock_t *lock) | ||
82 | { | ||
83 | SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); | ||
84 | SPIN_BUG_ON(lock->owner == current, lock, "recursion"); | ||
85 | SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), | ||
86 | lock, "cpu recursion"); | ||
87 | } | ||
88 | |||
89 | static inline void debug_spin_lock_after(raw_spinlock_t *lock) | ||
90 | { | ||
91 | lock->owner_cpu = raw_smp_processor_id(); | ||
92 | lock->owner = current; | ||
93 | } | ||
94 | |||
95 | static inline void debug_spin_unlock(raw_spinlock_t *lock) | ||
96 | { | ||
97 | SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); | ||
98 | SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); | ||
99 | SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); | ||
100 | SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), | ||
101 | lock, "wrong CPU"); | ||
102 | lock->owner = SPINLOCK_OWNER_INIT; | ||
103 | lock->owner_cpu = -1; | ||
104 | } | ||
105 | |||
106 | static void __spin_lock_debug(raw_spinlock_t *lock) | ||
107 | { | ||
108 | u64 i; | ||
109 | u64 loops = loops_per_jiffy * HZ; | ||
110 | |||
111 | for (i = 0; i < loops; i++) { | ||
112 | if (arch_spin_trylock(&lock->raw_lock)) | ||
113 | return; | ||
114 | __delay(1); | ||
115 | } | ||
116 | /* lockup suspected: */ | ||
117 | spin_dump(lock, "lockup suspected"); | ||
118 | #ifdef CONFIG_SMP | ||
119 | trigger_all_cpu_backtrace(); | ||
120 | #endif | ||
121 | |||
122 | /* | ||
123 | * The trylock above was causing a livelock. Give the lower level arch | ||
124 | * specific lock code a chance to acquire the lock. We have already | ||
125 | * printed a warning/backtrace at this point. The non-debug arch | ||
126 | * specific code might actually succeed in acquiring the lock. If it is | ||
127 | * not successful, the end-result is the same - there is no forward | ||
128 | * progress. | ||
129 | */ | ||
130 | arch_spin_lock(&lock->raw_lock); | ||
131 | } | ||
132 | |||
133 | void do_raw_spin_lock(raw_spinlock_t *lock) | ||
134 | { | ||
135 | debug_spin_lock_before(lock); | ||
136 | if (unlikely(!arch_spin_trylock(&lock->raw_lock))) | ||
137 | __spin_lock_debug(lock); | ||
138 | debug_spin_lock_after(lock); | ||
139 | } | ||
140 | |||
141 | int do_raw_spin_trylock(raw_spinlock_t *lock) | ||
142 | { | ||
143 | int ret = arch_spin_trylock(&lock->raw_lock); | ||
144 | |||
145 | if (ret) | ||
146 | debug_spin_lock_after(lock); | ||
147 | #ifndef CONFIG_SMP | ||
148 | /* | ||
149 | * Must not happen on UP: | ||
150 | */ | ||
151 | SPIN_BUG_ON(!ret, lock, "trylock failure on UP"); | ||
152 | #endif | ||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | void do_raw_spin_unlock(raw_spinlock_t *lock) | ||
157 | { | ||
158 | debug_spin_unlock(lock); | ||
159 | arch_spin_unlock(&lock->raw_lock); | ||
160 | } | ||
161 | |||
162 | static void rwlock_bug(rwlock_t *lock, const char *msg) | ||
163 | { | ||
164 | if (!debug_locks_off()) | ||
165 | return; | ||
166 | |||
167 | printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", | ||
168 | msg, raw_smp_processor_id(), current->comm, | ||
169 | task_pid_nr(current), lock); | ||
170 | dump_stack(); | ||
171 | } | ||
172 | |||
173 | #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) | ||
174 | |||
175 | #if 0 /* __write_lock_debug() can lock up - maybe this can too? */ | ||
176 | static void __read_lock_debug(rwlock_t *lock) | ||
177 | { | ||
178 | u64 i; | ||
179 | u64 loops = loops_per_jiffy * HZ; | ||
180 | int print_once = 1; | ||
181 | |||
182 | for (;;) { | ||
183 | for (i = 0; i < loops; i++) { | ||
184 | if (arch_read_trylock(&lock->raw_lock)) | ||
185 | return; | ||
186 | __delay(1); | ||
187 | } | ||
188 | /* lockup suspected: */ | ||
189 | if (print_once) { | ||
190 | print_once = 0; | ||
191 | printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " | ||
192 | "%s/%d, %p\n", | ||
193 | raw_smp_processor_id(), current->comm, | ||
194 | current->pid, lock); | ||
195 | dump_stack(); | ||
196 | } | ||
197 | } | ||
198 | } | ||
199 | #endif | ||
200 | |||
201 | void do_raw_read_lock(rwlock_t *lock) | ||
202 | { | ||
203 | RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); | ||
204 | arch_read_lock(&lock->raw_lock); | ||
205 | } | ||
206 | |||
207 | int do_raw_read_trylock(rwlock_t *lock) | ||
208 | { | ||
209 | int ret = arch_read_trylock(&lock->raw_lock); | ||
210 | |||
211 | #ifndef CONFIG_SMP | ||
212 | /* | ||
213 | * Must not happen on UP: | ||
214 | */ | ||
215 | RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); | ||
216 | #endif | ||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | void do_raw_read_unlock(rwlock_t *lock) | ||
221 | { | ||
222 | RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); | ||
223 | arch_read_unlock(&lock->raw_lock); | ||
224 | } | ||
225 | |||
226 | static inline void debug_write_lock_before(rwlock_t *lock) | ||
227 | { | ||
228 | RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); | ||
229 | RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); | ||
230 | RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), | ||
231 | lock, "cpu recursion"); | ||
232 | } | ||
233 | |||
234 | static inline void debug_write_lock_after(rwlock_t *lock) | ||
235 | { | ||
236 | lock->owner_cpu = raw_smp_processor_id(); | ||
237 | lock->owner = current; | ||
238 | } | ||
239 | |||
240 | static inline void debug_write_unlock(rwlock_t *lock) | ||
241 | { | ||
242 | RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); | ||
243 | RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); | ||
244 | RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), | ||
245 | lock, "wrong CPU"); | ||
246 | lock->owner = SPINLOCK_OWNER_INIT; | ||
247 | lock->owner_cpu = -1; | ||
248 | } | ||
249 | |||
250 | #if 0 /* This can cause lockups */ | ||
251 | static void __write_lock_debug(rwlock_t *lock) | ||
252 | { | ||
253 | u64 i; | ||
254 | u64 loops = loops_per_jiffy * HZ; | ||
255 | int print_once = 1; | ||
256 | |||
257 | for (;;) { | ||
258 | for (i = 0; i < loops; i++) { | ||
259 | if (arch_write_trylock(&lock->raw_lock)) | ||
260 | return; | ||
261 | __delay(1); | ||
262 | } | ||
263 | /* lockup suspected: */ | ||
264 | if (print_once) { | ||
265 | print_once = 0; | ||
266 | printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " | ||
267 | "%s/%d, %p\n", | ||
268 | raw_smp_processor_id(), current->comm, | ||
269 | current->pid, lock); | ||
270 | dump_stack(); | ||
271 | } | ||
272 | } | ||
273 | } | ||
274 | #endif | ||
275 | |||
276 | void do_raw_write_lock(rwlock_t *lock) | ||
277 | { | ||
278 | debug_write_lock_before(lock); | ||
279 | arch_write_lock(&lock->raw_lock); | ||
280 | debug_write_lock_after(lock); | ||
281 | } | ||
282 | |||
283 | int do_raw_write_trylock(rwlock_t *lock) | ||
284 | { | ||
285 | int ret = arch_write_trylock(&lock->raw_lock); | ||
286 | |||
287 | if (ret) | ||
288 | debug_write_lock_after(lock); | ||
289 | #ifndef CONFIG_SMP | ||
290 | /* | ||
291 | * Must not happen on UP: | ||
292 | */ | ||
293 | RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); | ||
294 | #endif | ||
295 | return ret; | ||
296 | } | ||
297 | |||
298 | void do_raw_write_unlock(rwlock_t *lock) | ||
299 | { | ||
300 | debug_write_unlock(lock); | ||
301 | arch_write_unlock(&lock->raw_lock); | ||
302 | } | ||