aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/lockdep.c116
-rw-r--r--kernel/locking/lockdep_internals.h27
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/qspinlock.c143
-rw-r--r--kernel/locking/qspinlock_paravirt.h4
-rw-r--r--kernel/locking/qspinlock_stat.h6
-rw-r--r--kernel/locking/rtmutex.c4
-rw-r--r--kernel/locking/rwsem-xadd.c15
-rw-r--r--kernel/locking/rwsem.c7
-rw-r--r--kernel/locking/rwsem.h95
10 files changed, 252 insertions, 167 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dd13f865ad40..1efada2dd9dd 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -138,7 +138,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
138 * get freed - this significantly simplifies the debugging code. 138 * get freed - this significantly simplifies the debugging code.
139 */ 139 */
140unsigned long nr_lock_classes; 140unsigned long nr_lock_classes;
141static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 141struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
142 142
143static inline struct lock_class *hlock_class(struct held_lock *hlock) 143static inline struct lock_class *hlock_class(struct held_lock *hlock)
144{ 144{
@@ -1391,7 +1391,9 @@ static void print_lock_class_header(struct lock_class *class, int depth)
1391 1391
1392 printk("%*s->", depth, ""); 1392 printk("%*s->", depth, "");
1393 print_lock_name(class); 1393 print_lock_name(class);
1394 printk(KERN_CONT " ops: %lu", class->ops); 1394#ifdef CONFIG_DEBUG_LOCKDEP
1395 printk(KERN_CONT " ops: %lu", debug_class_ops_read(class));
1396#endif
1395 printk(KERN_CONT " {\n"); 1397 printk(KERN_CONT " {\n");
1396 1398
1397 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { 1399 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
@@ -2148,76 +2150,6 @@ static int check_no_collision(struct task_struct *curr,
2148} 2150}
2149 2151
2150/* 2152/*
2151 * This is for building a chain between just two different classes,
2152 * instead of adding a new hlock upon current, which is done by
2153 * add_chain_cache().
2154 *
2155 * This can be called in any context with two classes, while
2156 * add_chain_cache() must be done within the lock owener's context
2157 * since it uses hlock which might be racy in another context.
2158 */
2159static inline int add_chain_cache_classes(unsigned int prev,
2160 unsigned int next,
2161 unsigned int irq_context,
2162 u64 chain_key)
2163{
2164 struct hlist_head *hash_head = chainhashentry(chain_key);
2165 struct lock_chain *chain;
2166
2167 /*
2168 * Allocate a new chain entry from the static array, and add
2169 * it to the hash:
2170 */
2171
2172 /*
2173 * We might need to take the graph lock, ensure we've got IRQs
2174 * disabled to make this an IRQ-safe lock.. for recursion reasons
2175 * lockdep won't complain about its own locking errors.
2176 */
2177 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2178 return 0;
2179
2180 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
2181 if (!debug_locks_off_graph_unlock())
2182 return 0;
2183
2184 print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
2185 dump_stack();
2186 return 0;
2187 }
2188
2189 chain = lock_chains + nr_lock_chains++;
2190 chain->chain_key = chain_key;
2191 chain->irq_context = irq_context;
2192 chain->depth = 2;
2193 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
2194 chain->base = nr_chain_hlocks;
2195 nr_chain_hlocks += chain->depth;
2196 chain_hlocks[chain->base] = prev - 1;
2197 chain_hlocks[chain->base + 1] = next -1;
2198 }
2199#ifdef CONFIG_DEBUG_LOCKDEP
2200 /*
2201 * Important for check_no_collision().
2202 */
2203 else {
2204 if (!debug_locks_off_graph_unlock())
2205 return 0;
2206
2207 print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
2208 dump_stack();
2209 return 0;
2210 }
2211#endif
2212
2213 hlist_add_head_rcu(&chain->entry, hash_head);
2214 debug_atomic_inc(chain_lookup_misses);
2215 inc_chains();
2216
2217 return 1;
2218}
2219
2220/*
2221 * Adds a dependency chain into chain hashtable. And must be called with 2153 * Adds a dependency chain into chain hashtable. And must be called with
2222 * graph_lock held. 2154 * graph_lock held.
2223 * 2155 *
@@ -3262,6 +3194,10 @@ static int __lock_is_held(const struct lockdep_map *lock, int read);
3262/* 3194/*
3263 * This gets called for every mutex_lock*()/spin_lock*() operation. 3195 * This gets called for every mutex_lock*()/spin_lock*() operation.
3264 * We maintain the dependency maps and validate the locking attempt: 3196 * We maintain the dependency maps and validate the locking attempt:
3197 *
3198 * The callers must make sure that IRQs are disabled before calling it,
3199 * otherwise we could get an interrupt which would want to take locks,
3200 * which would end up in lockdep again.
3265 */ 3201 */
3266static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, 3202static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3267 int trylock, int read, int check, int hardirqs_off, 3203 int trylock, int read, int check, int hardirqs_off,
@@ -3279,14 +3215,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3279 if (unlikely(!debug_locks)) 3215 if (unlikely(!debug_locks))
3280 return 0; 3216 return 0;
3281 3217
3282 /*
3283 * Lockdep should run with IRQs disabled, otherwise we could
3284 * get an interrupt which would want to take locks, which would
3285 * end up in lockdep and have you got a head-ache already?
3286 */
3287 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
3288 return 0;
3289
3290 if (!prove_locking || lock->key == &__lockdep_no_validate__) 3218 if (!prove_locking || lock->key == &__lockdep_no_validate__)
3291 check = 0; 3219 check = 0;
3292 3220
@@ -3300,7 +3228,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3300 if (!class) 3228 if (!class)
3301 return 0; 3229 return 0;
3302 } 3230 }
3303 atomic_inc((atomic_t *)&class->ops); 3231
3232 debug_class_ops_inc(class);
3233
3304 if (very_verbose(class)) { 3234 if (very_verbose(class)) {
3305 printk("\nacquire class [%px] %s", class->key, class->name); 3235 printk("\nacquire class [%px] %s", class->key, class->name);
3306 if (class->name_version > 1) 3236 if (class->name_version > 1)
@@ -3543,6 +3473,9 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
3543{ 3473{
3544 struct held_lock *hlock; 3474 struct held_lock *hlock;
3545 3475
3476 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
3477 return 0;
3478
3546 for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { 3479 for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
3547 if (!__lock_acquire(hlock->instance, 3480 if (!__lock_acquire(hlock->instance,
3548 hlock_class(hlock)->subclass, 3481 hlock_class(hlock)->subclass,
@@ -3696,6 +3629,13 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
3696 curr->lockdep_depth = i; 3629 curr->lockdep_depth = i;
3697 curr->curr_chain_key = hlock->prev_chain_key; 3630 curr->curr_chain_key = hlock->prev_chain_key;
3698 3631
3632 /*
3633 * The most likely case is when the unlock is on the innermost
3634 * lock. In this case, we are done!
3635 */
3636 if (i == depth-1)
3637 return 1;
3638
3699 if (reacquire_held_locks(curr, depth, i + 1)) 3639 if (reacquire_held_locks(curr, depth, i + 1))
3700 return 0; 3640 return 0;
3701 3641
@@ -3703,10 +3643,14 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
3703 * We had N bottles of beer on the wall, we drank one, but now 3643 * We had N bottles of beer on the wall, we drank one, but now
3704 * there's not N-1 bottles of beer left on the wall... 3644 * there's not N-1 bottles of beer left on the wall...
3705 */ 3645 */
3706 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) 3646 DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1);
3707 return 0;
3708 3647
3709 return 1; 3648 /*
3649 * Since reacquire_held_locks() would have called check_chain_key()
3650 * indirectly via __lock_acquire(), we don't need to do it again
3651 * on return.
3652 */
3653 return 0;
3710} 3654}
3711 3655
3712static int __lock_is_held(const struct lockdep_map *lock, int read) 3656static int __lock_is_held(const struct lockdep_map *lock, int read)
@@ -4122,7 +4066,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
4122{ 4066{
4123 unsigned long flags; 4067 unsigned long flags;
4124 4068
4125 if (unlikely(!lock_stat)) 4069 if (unlikely(!lock_stat || !debug_locks))
4126 return; 4070 return;
4127 4071
4128 if (unlikely(current->lockdep_recursion)) 4072 if (unlikely(current->lockdep_recursion))
@@ -4142,7 +4086,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
4142{ 4086{
4143 unsigned long flags; 4087 unsigned long flags;
4144 4088
4145 if (unlikely(!lock_stat)) 4089 if (unlikely(!lock_stat || !debug_locks))
4146 return; 4090 return;
4147 4091
4148 if (unlikely(current->lockdep_recursion)) 4092 if (unlikely(current->lockdep_recursion))
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index d459d624ba2a..88c847a41c8a 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -152,9 +152,15 @@ struct lockdep_stats {
152 int nr_find_usage_forwards_recursions; 152 int nr_find_usage_forwards_recursions;
153 int nr_find_usage_backwards_checks; 153 int nr_find_usage_backwards_checks;
154 int nr_find_usage_backwards_recursions; 154 int nr_find_usage_backwards_recursions;
155
156 /*
157 * Per lock class locking operation stat counts
158 */
159 unsigned long lock_class_ops[MAX_LOCKDEP_KEYS];
155}; 160};
156 161
157DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); 162DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
163extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
158 164
159#define __debug_atomic_inc(ptr) \ 165#define __debug_atomic_inc(ptr) \
160 this_cpu_inc(lockdep_stats.ptr); 166 this_cpu_inc(lockdep_stats.ptr);
@@ -179,9 +185,30 @@ DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
179 } \ 185 } \
180 __total; \ 186 __total; \
181}) 187})
188
189static inline void debug_class_ops_inc(struct lock_class *class)
190{
191 int idx;
192
193 idx = class - lock_classes;
194 __debug_atomic_inc(lock_class_ops[idx]);
195}
196
197static inline unsigned long debug_class_ops_read(struct lock_class *class)
198{
199 int idx, cpu;
200 unsigned long ops = 0;
201
202 idx = class - lock_classes;
203 for_each_possible_cpu(cpu)
204 ops += per_cpu(lockdep_stats.lock_class_ops[idx], cpu);
205 return ops;
206}
207
182#else 208#else
183# define __debug_atomic_inc(ptr) do { } while (0) 209# define __debug_atomic_inc(ptr) do { } while (0)
184# define debug_atomic_inc(ptr) do { } while (0) 210# define debug_atomic_inc(ptr) do { } while (0)
185# define debug_atomic_dec(ptr) do { } while (0) 211# define debug_atomic_dec(ptr) do { } while (0)
186# define debug_atomic_read(ptr) 0 212# define debug_atomic_read(ptr) 0
213# define debug_class_ops_inc(ptr) do { } while (0)
187#endif 214#endif
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 3dd980dfba2d..3d31f9b0059e 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -68,7 +68,7 @@ static int l_show(struct seq_file *m, void *v)
68 68
69 seq_printf(m, "%p", class->key); 69 seq_printf(m, "%p", class->key);
70#ifdef CONFIG_DEBUG_LOCKDEP 70#ifdef CONFIG_DEBUG_LOCKDEP
71 seq_printf(m, " OPS:%8ld", class->ops); 71 seq_printf(m, " OPS:%8ld", debug_class_ops_read(class));
72#endif 72#endif
73#ifdef CONFIG_PROVE_LOCKING 73#ifdef CONFIG_PROVE_LOCKING
74 seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class)); 74 seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index bfaeb05123ff..8a8c3c208c5e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -74,12 +74,24 @@
74 */ 74 */
75 75
76#include "mcs_spinlock.h" 76#include "mcs_spinlock.h"
77#define MAX_NODES 4
77 78
79/*
80 * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
81 * size and four of them will fit nicely in one 64-byte cacheline. For
82 * pvqspinlock, however, we need more space for extra data. To accommodate
83 * that, we insert two more long words to pad it up to 32 bytes. IOW, only
84 * two of them can fit in a cacheline in this case. That is OK as it is rare
85 * to have more than 2 levels of slowpath nesting in actual use. We don't
86 * want to penalize pvqspinlocks to optimize for a rare case in native
87 * qspinlocks.
88 */
89struct qnode {
90 struct mcs_spinlock mcs;
78#ifdef CONFIG_PARAVIRT_SPINLOCKS 91#ifdef CONFIG_PARAVIRT_SPINLOCKS
79#define MAX_NODES 8 92 long reserved[2];
80#else
81#define MAX_NODES 4
82#endif 93#endif
94};
83 95
84/* 96/*
85 * The pending bit spinning loop count. 97 * The pending bit spinning loop count.
@@ -101,7 +113,7 @@
101 * 113 *
102 * PV doubles the storage and uses the second cacheline for PV state. 114 * PV doubles the storage and uses the second cacheline for PV state.
103 */ 115 */
104static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); 116static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
105 117
106/* 118/*
107 * We must be able to distinguish between no-tail and the tail at 0:0, 119 * We must be able to distinguish between no-tail and the tail at 0:0,
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
126 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; 138 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
127 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; 139 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
128 140
129 return per_cpu_ptr(&mcs_nodes[idx], cpu); 141 return per_cpu_ptr(&qnodes[idx].mcs, cpu);
142}
143
144static inline __pure
145struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
146{
147 return &((struct qnode *)base + idx)->mcs;
130} 148}
131 149
132#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) 150#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
@@ -232,6 +250,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
232#endif /* _Q_PENDING_BITS == 8 */ 250#endif /* _Q_PENDING_BITS == 8 */
233 251
234/** 252/**
253 * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
254 * @lock : Pointer to queued spinlock structure
255 * Return: The previous lock value
256 *
257 * *,*,* -> *,1,*
258 */
259#ifndef queued_fetch_set_pending_acquire
260static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
261{
262 return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
263}
264#endif
265
266/**
235 * set_locked - Set the lock bit and own the lock 267 * set_locked - Set the lock bit and own the lock
236 * @lock: Pointer to queued spinlock structure 268 * @lock: Pointer to queued spinlock structure
237 * 269 *
@@ -326,43 +358,48 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
326 /* 358 /*
327 * trylock || pending 359 * trylock || pending
328 * 360 *
329 * 0,0,0 -> 0,0,1 ; trylock 361 * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
330 * 0,0,1 -> 0,1,1 ; pending
331 */ 362 */
332 val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); 363 val = queued_fetch_set_pending_acquire(lock);
333 if (!(val & ~_Q_LOCKED_MASK)) {
334 /*
335 * We're pending, wait for the owner to go away.
336 *
337 * *,1,1 -> *,1,0
338 *
339 * this wait loop must be a load-acquire such that we match the
340 * store-release that clears the locked bit and create lock
341 * sequentiality; this is because not all
342 * clear_pending_set_locked() implementations imply full
343 * barriers.
344 */
345 if (val & _Q_LOCKED_MASK) {
346 atomic_cond_read_acquire(&lock->val,
347 !(VAL & _Q_LOCKED_MASK));
348 }
349 364
350 /* 365 /*
351 * take ownership and clear the pending bit. 366 * If we observe contention, there is a concurrent locker.
352 * 367 *
353 * *,1,0 -> *,0,1 368 * Undo and queue; our setting of PENDING might have made the
354 */ 369 * n,0,0 -> 0,0,0 transition fail and it will now be waiting
355 clear_pending_set_locked(lock); 370 * on @next to become !NULL.
356 qstat_inc(qstat_lock_pending, true); 371 */
357 return; 372 if (unlikely(val & ~_Q_LOCKED_MASK)) {
373
374 /* Undo PENDING if we set it. */
375 if (!(val & _Q_PENDING_MASK))
376 clear_pending(lock);
377
378 goto queue;
358 } 379 }
359 380
360 /* 381 /*
361 * If pending was clear but there are waiters in the queue, then 382 * We're pending, wait for the owner to go away.
362 * we need to undo our setting of pending before we queue ourselves. 383 *
384 * 0,1,1 -> 0,1,0
385 *
386 * this wait loop must be a load-acquire such that we match the
387 * store-release that clears the locked bit and create lock
388 * sequentiality; this is because not all
389 * clear_pending_set_locked() implementations imply full
390 * barriers.
391 */
392 if (val & _Q_LOCKED_MASK)
393 atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK));
394
395 /*
396 * take ownership and clear the pending bit.
397 *
398 * 0,1,0 -> 0,0,1
363 */ 399 */
364 if (!(val & _Q_PENDING_MASK)) 400 clear_pending_set_locked(lock);
365 clear_pending(lock); 401 qstat_inc(qstat_lock_pending, true);
402 return;
366 403
367 /* 404 /*
368 * End of pending bit optimistic spinning and beginning of MCS 405 * End of pending bit optimistic spinning and beginning of MCS
@@ -371,11 +408,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
371queue: 408queue:
372 qstat_inc(qstat_lock_slowpath, true); 409 qstat_inc(qstat_lock_slowpath, true);
373pv_queue: 410pv_queue:
374 node = this_cpu_ptr(&mcs_nodes[0]); 411 node = this_cpu_ptr(&qnodes[0].mcs);
375 idx = node->count++; 412 idx = node->count++;
376 tail = encode_tail(smp_processor_id(), idx); 413 tail = encode_tail(smp_processor_id(), idx);
377 414
378 node += idx; 415 node = grab_mcs_node(node, idx);
416
417 /*
418 * Keep counts of non-zero index values:
419 */
420 qstat_inc(qstat_lock_idx1 + idx - 1, idx);
379 421
380 /* 422 /*
381 * Ensure that we increment the head node->count before initialising 423 * Ensure that we increment the head node->count before initialising
@@ -476,16 +518,25 @@ locked:
476 */ 518 */
477 519
478 /* 520 /*
479 * In the PV case we might already have _Q_LOCKED_VAL set. 521 * In the PV case we might already have _Q_LOCKED_VAL set, because
522 * of lock stealing; therefore we must also allow:
523 *
524 * n,0,1 -> 0,0,1
480 * 525 *
481 * The atomic_cond_read_acquire() call above has provided the 526 * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
482 * necessary acquire semantics required for locking. 527 * above wait condition, therefore any concurrent setting of
528 * PENDING will make the uncontended transition fail.
483 */ 529 */
484 if (((val & _Q_TAIL_MASK) == tail) && 530 if ((val & _Q_TAIL_MASK) == tail) {
485 atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) 531 if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
486 goto release; /* No contention */ 532 goto release; /* No contention */
533 }
487 534
488 /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ 535 /*
536 * Either somebody is queued behind us or _Q_PENDING_VAL got set
537 * which will then detect the remaining tail and queue behind us
538 * ensuring we'll see a @next.
539 */
489 set_locked(lock); 540 set_locked(lock);
490 541
491 /* 542 /*
@@ -501,7 +552,7 @@ release:
501 /* 552 /*
502 * release the node 553 * release the node
503 */ 554 */
504 __this_cpu_dec(mcs_nodes[0].count); 555 __this_cpu_dec(qnodes[0].mcs.count);
505} 556}
506EXPORT_SYMBOL(queued_spin_lock_slowpath); 557EXPORT_SYMBOL(queued_spin_lock_slowpath);
507 558
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 5a0cf5f9008c..0130e488ebfe 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -49,8 +49,6 @@ enum vcpu_state {
49 49
50struct pv_node { 50struct pv_node {
51 struct mcs_spinlock mcs; 51 struct mcs_spinlock mcs;
52 struct mcs_spinlock __res[3];
53
54 int cpu; 52 int cpu;
55 u8 state; 53 u8 state;
56}; 54};
@@ -281,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node)
281{ 279{
282 struct pv_node *pn = (struct pv_node *)node; 280 struct pv_node *pn = (struct pv_node *)node;
283 281
284 BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock)); 282 BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode));
285 283
286 pn->cpu = smp_processor_id(); 284 pn->cpu = smp_processor_id();
287 pn->state = vcpu_running; 285 pn->state = vcpu_running;
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 6bd78c0740fc..42d3d8dc8f49 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -55,6 +55,9 @@ enum qlock_stats {
55 qstat_pv_wait_node, 55 qstat_pv_wait_node,
56 qstat_lock_pending, 56 qstat_lock_pending,
57 qstat_lock_slowpath, 57 qstat_lock_slowpath,
58 qstat_lock_idx1,
59 qstat_lock_idx2,
60 qstat_lock_idx3,
58 qstat_num, /* Total number of statistical counters */ 61 qstat_num, /* Total number of statistical counters */
59 qstat_reset_cnts = qstat_num, 62 qstat_reset_cnts = qstat_num,
60}; 63};
@@ -82,6 +85,9 @@ static const char * const qstat_names[qstat_num + 1] = {
82 [qstat_pv_wait_node] = "pv_wait_node", 85 [qstat_pv_wait_node] = "pv_wait_node",
83 [qstat_lock_pending] = "lock_pending", 86 [qstat_lock_pending] = "lock_pending",
84 [qstat_lock_slowpath] = "lock_slowpath", 87 [qstat_lock_slowpath] = "lock_slowpath",
88 [qstat_lock_idx1] = "lock_index1",
89 [qstat_lock_idx2] = "lock_index2",
90 [qstat_lock_idx3] = "lock_index3",
85 [qstat_reset_cnts] = "reset_counters", 91 [qstat_reset_cnts] = "reset_counters",
86}; 92};
87 93
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 2823d4163a37..581edcc63c26 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1485,9 +1485,9 @@ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
1485 __rt_mutex_lock(lock, subclass); 1485 __rt_mutex_lock(lock, subclass);
1486} 1486}
1487EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); 1487EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
1488#endif
1489 1488
1490#ifndef CONFIG_DEBUG_LOCK_ALLOC 1489#else /* !CONFIG_DEBUG_LOCK_ALLOC */
1490
1491/** 1491/**
1492 * rt_mutex_lock - lock a rt_mutex 1492 * rt_mutex_lock - lock a rt_mutex
1493 * 1493 *
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 3064c50e181e..09b180063ee1 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -180,7 +180,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
180 * but it gives the spinners an early indication that the 180 * but it gives the spinners an early indication that the
181 * readers now have the lock. 181 * readers now have the lock.
182 */ 182 */
183 rwsem_set_reader_owned(sem); 183 __rwsem_set_reader_owned(sem, waiter->task);
184 } 184 }
185 185
186 /* 186 /*
@@ -233,8 +233,19 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
233 waiter.type = RWSEM_WAITING_FOR_READ; 233 waiter.type = RWSEM_WAITING_FOR_READ;
234 234
235 raw_spin_lock_irq(&sem->wait_lock); 235 raw_spin_lock_irq(&sem->wait_lock);
236 if (list_empty(&sem->wait_list)) 236 if (list_empty(&sem->wait_list)) {
237 /*
238 * In case the wait queue is empty and the lock isn't owned
239 * by a writer, this reader can exit the slowpath and return
240 * immediately as its RWSEM_ACTIVE_READ_BIAS has already
241 * been set in the count.
242 */
243 if (atomic_long_read(&sem->count) >= 0) {
244 raw_spin_unlock_irq(&sem->wait_lock);
245 return sem;
246 }
237 adjustment += RWSEM_WAITING_BIAS; 247 adjustment += RWSEM_WAITING_BIAS;
248 }
238 list_add_tail(&waiter.list, &sem->wait_list); 249 list_add_tail(&waiter.list, &sem->wait_list);
239 250
240 /* we're now waiting on the lock, but no longer actively locking */ 251 /* we're now waiting on the lock, but no longer actively locking */
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 776308d2fa9e..e586f0d03ad3 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -117,8 +117,9 @@ EXPORT_SYMBOL(down_write_trylock);
117void up_read(struct rw_semaphore *sem) 117void up_read(struct rw_semaphore *sem)
118{ 118{
119 rwsem_release(&sem->dep_map, 1, _RET_IP_); 119 rwsem_release(&sem->dep_map, 1, _RET_IP_);
120 DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED); 120 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
121 121
122 rwsem_clear_reader_owned(sem);
122 __up_read(sem); 123 __up_read(sem);
123} 124}
124 125
@@ -181,7 +182,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
181 might_sleep(); 182 might_sleep();
182 183
183 __down_read(sem); 184 __down_read(sem);
184 rwsem_set_reader_owned(sem); 185 __rwsem_set_reader_owned(sem, NULL);
185} 186}
186 187
187EXPORT_SYMBOL(down_read_non_owner); 188EXPORT_SYMBOL(down_read_non_owner);
@@ -215,7 +216,7 @@ EXPORT_SYMBOL(down_write_killable_nested);
215 216
216void up_read_non_owner(struct rw_semaphore *sem) 217void up_read_non_owner(struct rw_semaphore *sem)
217{ 218{
218 DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED); 219 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
219 __up_read(sem); 220 __up_read(sem);
220} 221}
221 222
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index b9d0e72aa80f..bad2bca0268b 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,24 +1,30 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2/* 2/*
3 * The owner field of the rw_semaphore structure will be set to 3 * The least significant 2 bits of the owner value has the following
4 * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear 4 * meanings when set.
5 * the owner field when it unlocks. A reader, on the other hand, will 5 * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
6 * not touch the owner field when it unlocks. 6 * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
7 * i.e. the owner(s) cannot be readily determined. It can be reader
8 * owned or the owning writer is indeterminate.
7 * 9 *
8 * In essence, the owner field now has the following 4 states: 10 * When a writer acquires a rwsem, it puts its task_struct pointer
9 * 1) 0 11 * into the owner field. It is cleared after an unlock.
10 * - lock is free or the owner hasn't set the field yet 12 *
11 * 2) RWSEM_READER_OWNED 13 * When a reader acquires a rwsem, it will also puts its task_struct
12 * - lock is currently or previously owned by readers (lock is free 14 * pointer into the owner field with both the RWSEM_READER_OWNED and
13 * or not set by owner yet) 15 * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
14 * 3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well 16 * largely be left untouched. So for a free or reader-owned rwsem,
15 * - lock is owned by an anonymous writer, so spinning on the lock 17 * the owner value may contain information about the last reader that
16 * owner should be disabled. 18 * acquires the rwsem. The anonymous bit is set because that particular
17 * 4) Other non-zero value 19 * reader may or may not still own the lock.
18 * - a writer owns the lock and other writers can spin on the lock owner. 20 *
21 * That information may be helpful in debugging cases where the system
22 * seems to hang on a reader owned rwsem especially if only one reader
23 * is involved. Ideally we would like to track all the readers that own
24 * a rwsem, but the overhead is simply too big.
19 */ 25 */
20#define RWSEM_ANONYMOUSLY_OWNED (1UL << 0) 26#define RWSEM_READER_OWNED (1UL << 0)
21#define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED) 27#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
22 28
23#ifdef CONFIG_DEBUG_RWSEMS 29#ifdef CONFIG_DEBUG_RWSEMS
24# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) 30# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
@@ -44,15 +50,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
44 WRITE_ONCE(sem->owner, NULL); 50 WRITE_ONCE(sem->owner, NULL);
45} 51}
46 52
53/*
54 * The task_struct pointer of the last owning reader will be left in
55 * the owner field.
56 *
57 * Note that the owner value just indicates the task has owned the rwsem
58 * previously, it may not be the real owner or one of the real owners
59 * anymore when that field is examined, so take it with a grain of salt.
60 */
61static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
62 struct task_struct *owner)
63{
64 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
65 | RWSEM_ANONYMOUSLY_OWNED;
66
67 WRITE_ONCE(sem->owner, (struct task_struct *)val);
68}
69
47static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) 70static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
48{ 71{
49 /* 72 __rwsem_set_reader_owned(sem, current);
50 * We check the owner value first to make sure that we will only
51 * do a write to the rwsem cacheline when it is really necessary
52 * to minimize cacheline contention.
53 */
54 if (READ_ONCE(sem->owner) != RWSEM_READER_OWNED)
55 WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
56} 73}
57 74
58/* 75/*
@@ -72,6 +89,25 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
72{ 89{
73 return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED; 90 return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
74} 91}
92
93#ifdef CONFIG_DEBUG_RWSEMS
94/*
95 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
96 * is a task pointer in owner of a reader-owned rwsem, it will be the
97 * real owner or one of the real owners. The only exception is when the
98 * unlock is done by up_read_non_owner().
99 */
100#define rwsem_clear_reader_owned rwsem_clear_reader_owned
101static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
102{
103 unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
104 | RWSEM_ANONYMOUSLY_OWNED;
105 if (READ_ONCE(sem->owner) == (struct task_struct *)val)
106 cmpxchg_relaxed((unsigned long *)&sem->owner, val,
107 RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
108}
109#endif
110
75#else 111#else
76static inline void rwsem_set_owner(struct rw_semaphore *sem) 112static inline void rwsem_set_owner(struct rw_semaphore *sem)
77{ 113{
@@ -81,7 +117,18 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
81{ 117{
82} 118}
83 119
120static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
121 struct task_struct *owner)
122{
123}
124
84static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) 125static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
85{ 126{
86} 127}
87#endif 128#endif
129
130#ifndef rwsem_clear_reader_owned
131static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
132{
133}
134#endif