aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/Makefile1
-rw-r--r--kernel/locking/lockdep_internals.h6
-rw-r--r--kernel/locking/locktorture.c12
-rw-r--r--kernel/locking/qrwlock.c133
-rw-r--r--kernel/locking/rtmutex-debug.h5
-rw-r--r--kernel/locking/rtmutex.c273
-rw-r--r--kernel/locking/rtmutex.h5
-rw-r--r--kernel/locking/rwsem-xadd.c274
-rw-r--r--kernel/locking/rwsem.c31
9 files changed, 664 insertions, 76 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index b8bdcd4785b7..8541bfdfd232 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
24obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 24obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
25obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o 25obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
26obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o 26obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
27obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
27obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o 28obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 4f560cfedc8f..51c4b24b6328 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -54,9 +54,9 @@ enum {
54 * table (if it's not there yet), and we check it for lock order 54 * table (if it's not there yet), and we check it for lock order
55 * conflicts and deadlocks. 55 * conflicts and deadlocks.
56 */ 56 */
57#define MAX_LOCKDEP_ENTRIES 16384UL 57#define MAX_LOCKDEP_ENTRIES 32768UL
58 58
59#define MAX_LOCKDEP_CHAINS_BITS 15 59#define MAX_LOCKDEP_CHAINS_BITS 16
60#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) 60#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
61 61
62#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) 62#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
@@ -65,7 +65,7 @@ enum {
65 * Stack-trace: tightly packed array of stack backtrace 65 * Stack-trace: tightly packed array of stack backtrace
66 * addresses. Protected by the hash_lock. 66 * addresses. Protected by the hash_lock.
67 */ 67 */
68#define MAX_STACK_TRACE_ENTRIES 262144UL 68#define MAX_STACK_TRACE_ENTRIES 524288UL
69 69
70extern struct list_head all_lock_classes; 70extern struct list_head all_lock_classes;
71extern struct lock_chain lock_chains[]; 71extern struct lock_chain lock_chains[];
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index f26b1a18e34e..0955b885d0dc 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -82,14 +82,14 @@ struct lock_writer_stress_stats {
82}; 82};
83static struct lock_writer_stress_stats *lwsa; 83static struct lock_writer_stress_stats *lwsa;
84 84
85#if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE) 85#if defined(MODULE)
86#define LOCKTORTURE_RUNNABLE_INIT 1 86#define LOCKTORTURE_RUNNABLE_INIT 1
87#else 87#else
88#define LOCKTORTURE_RUNNABLE_INIT 0 88#define LOCKTORTURE_RUNNABLE_INIT 0
89#endif 89#endif
90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; 90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT;
91module_param(locktorture_runnable, int, 0444); 91module_param(locktorture_runnable, int, 0444);
92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot"); 92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init");
93 93
94/* Forward reference. */ 94/* Forward reference. */
95static void lock_torture_cleanup(void); 95static void lock_torture_cleanup(void);
@@ -216,10 +216,11 @@ static int lock_torture_writer(void *arg)
216 static DEFINE_TORTURE_RANDOM(rand); 216 static DEFINE_TORTURE_RANDOM(rand);
217 217
218 VERBOSE_TOROUT_STRING("lock_torture_writer task started"); 218 VERBOSE_TOROUT_STRING("lock_torture_writer task started");
219 set_user_nice(current, 19); 219 set_user_nice(current, MAX_NICE);
220 220
221 do { 221 do {
222 schedule_timeout_uninterruptible(1); 222 if ((torture_random(&rand) & 0xfffff) == 0)
223 schedule_timeout_uninterruptible(1);
223 cur_ops->writelock(); 224 cur_ops->writelock();
224 if (WARN_ON_ONCE(lock_is_write_held)) 225 if (WARN_ON_ONCE(lock_is_write_held))
225 lwsp->n_write_lock_fail++; 226 lwsp->n_write_lock_fail++;
@@ -354,7 +355,8 @@ static int __init lock_torture_init(void)
354 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, 355 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
355 }; 356 };
356 357
357 torture_init_begin(torture_type, verbose, &locktorture_runnable); 358 if (!torture_init_begin(torture_type, verbose, &locktorture_runnable))
359 return -EBUSY;
358 360
359 /* Process args and tell the world that the torturer is on the job. */ 361 /* Process args and tell the world that the torturer is on the job. */
360 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 362 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
new file mode 100644
index 000000000000..fb5b8ac411a5
--- /dev/null
+++ b/kernel/locking/qrwlock.c
@@ -0,0 +1,133 @@
1/*
2 * Queue read/write lock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
15 *
16 * Authors: Waiman Long <waiman.long@hp.com>
17 */
18#include <linux/smp.h>
19#include <linux/bug.h>
20#include <linux/cpumask.h>
21#include <linux/percpu.h>
22#include <linux/hardirq.h>
23#include <linux/mutex.h>
24#include <asm/qrwlock.h>
25
26/**
27 * rspin_until_writer_unlock - inc reader count & spin until writer is gone
28 * @lock : Pointer to queue rwlock structure
29 * @writer: Current queue rwlock writer status byte
30 *
31 * In interrupt context or at the head of the queue, the reader will just
32 * increment the reader count & wait until the writer releases the lock.
33 */
34static __always_inline void
35rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
36{
37 while ((cnts & _QW_WMASK) == _QW_LOCKED) {
38 arch_mutex_cpu_relax();
39 cnts = smp_load_acquire((u32 *)&lock->cnts);
40 }
41}
42
43/**
44 * queue_read_lock_slowpath - acquire read lock of a queue rwlock
45 * @lock: Pointer to queue rwlock structure
46 */
47void queue_read_lock_slowpath(struct qrwlock *lock)
48{
49 u32 cnts;
50
51 /*
52 * Readers come here when they cannot get the lock without waiting
53 */
54 if (unlikely(in_interrupt())) {
55 /*
56 * Readers in interrupt context will spin until the lock is
57 * available without waiting in the queue.
58 */
59 cnts = smp_load_acquire((u32 *)&lock->cnts);
60 rspin_until_writer_unlock(lock, cnts);
61 return;
62 }
63 atomic_sub(_QR_BIAS, &lock->cnts);
64
65 /*
66 * Put the reader into the wait queue
67 */
68 arch_spin_lock(&lock->lock);
69
70 /*
71 * At the head of the wait queue now, wait until the writer state
72 * goes to 0 and then try to increment the reader count and get
73 * the lock. It is possible that an incoming writer may steal the
74 * lock in the interim, so it is necessary to check the writer byte
75 * to make sure that the write lock isn't taken.
76 */
77 while (atomic_read(&lock->cnts) & _QW_WMASK)
78 arch_mutex_cpu_relax();
79
80 cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
81 rspin_until_writer_unlock(lock, cnts);
82
83 /*
84 * Signal the next one in queue to become queue head
85 */
86 arch_spin_unlock(&lock->lock);
87}
88EXPORT_SYMBOL(queue_read_lock_slowpath);
89
90/**
91 * queue_write_lock_slowpath - acquire write lock of a queue rwlock
92 * @lock : Pointer to queue rwlock structure
93 */
94void queue_write_lock_slowpath(struct qrwlock *lock)
95{
96 u32 cnts;
97
98 /* Put the writer into the wait queue */
99 arch_spin_lock(&lock->lock);
100
101 /* Try to acquire the lock directly if no reader is present */
102 if (!atomic_read(&lock->cnts) &&
103 (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
104 goto unlock;
105
106 /*
107 * Set the waiting flag to notify readers that a writer is pending,
108 * or wait for a previous writer to go away.
109 */
110 for (;;) {
111 cnts = atomic_read(&lock->cnts);
112 if (!(cnts & _QW_WMASK) &&
113 (atomic_cmpxchg(&lock->cnts, cnts,
114 cnts | _QW_WAITING) == cnts))
115 break;
116
117 arch_mutex_cpu_relax();
118 }
119
120 /* When no more readers, set the locked flag */
121 for (;;) {
122 cnts = atomic_read(&lock->cnts);
123 if ((cnts == _QW_WAITING) &&
124 (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
125 _QW_LOCKED) == _QW_WAITING))
126 break;
127
128 arch_mutex_cpu_relax();
129 }
130unlock:
131 arch_spin_unlock(&lock->lock);
132}
133EXPORT_SYMBOL(queue_write_lock_slowpath);
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index 14193d596d78..ab29b6a22669 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
31{ 31{
32 return (waiter != NULL); 32 return (waiter != NULL);
33} 33}
34
35static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
36{
37 debug_rt_mutex_print_deadlock(w);
38}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index aa4dff04b594..fc605941b9b8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
83 owner = *p; 83 owner = *p;
84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); 84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
85} 85}
86
87/*
88 * Safe fastpath aware unlock:
89 * 1) Clear the waiters bit
90 * 2) Drop lock->wait_lock
91 * 3) Try to unlock the lock with cmpxchg
92 */
93static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
94 __releases(lock->wait_lock)
95{
96 struct task_struct *owner = rt_mutex_owner(lock);
97
98 clear_rt_mutex_waiters(lock);
99 raw_spin_unlock(&lock->wait_lock);
100 /*
101 * If a new waiter comes in between the unlock and the cmpxchg
102 * we have two situations:
103 *
104 * unlock(wait_lock);
105 * lock(wait_lock);
106 * cmpxchg(p, owner, 0) == owner
107 * mark_rt_mutex_waiters(lock);
108 * acquire(lock);
109 * or:
110 *
111 * unlock(wait_lock);
112 * lock(wait_lock);
113 * mark_rt_mutex_waiters(lock);
114 *
115 * cmpxchg(p, owner, 0) != owner
116 * enqueue_waiter();
117 * unlock(wait_lock);
118 * lock(wait_lock);
119 * wake waiter();
120 * unlock(wait_lock);
121 * lock(wait_lock);
122 * acquire(lock);
123 */
124 return rt_mutex_cmpxchg(lock, owner, NULL);
125}
126
86#else 127#else
87# define rt_mutex_cmpxchg(l,c,n) (0) 128# define rt_mutex_cmpxchg(l,c,n) (0)
88static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 129static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
90 lock->owner = (struct task_struct *) 131 lock->owner = (struct task_struct *)
91 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 132 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
92} 133}
134
135/*
136 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
137 */
138static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
139 __releases(lock->wait_lock)
140{
141 lock->owner = NULL;
142 raw_spin_unlock(&lock->wait_lock);
143 return true;
144}
93#endif 145#endif
94 146
95static inline int 147static inline int
@@ -260,27 +312,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
260 */ 312 */
261int max_lock_depth = 1024; 313int max_lock_depth = 1024;
262 314
315static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
316{
317 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
318}
319
263/* 320/*
264 * Adjust the priority chain. Also used for deadlock detection. 321 * Adjust the priority chain. Also used for deadlock detection.
265 * Decreases task's usage by one - may thus free the task. 322 * Decreases task's usage by one - may thus free the task.
266 * 323 *
267 * @task: the task owning the mutex (owner) for which a chain walk is probably 324 * @task: the task owning the mutex (owner) for which a chain walk is
268 * needed 325 * probably needed
269 * @deadlock_detect: do we have to carry out deadlock detection? 326 * @deadlock_detect: do we have to carry out deadlock detection?
270 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 327 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
271 * things for a task that has just got its priority adjusted, and 328 * things for a task that has just got its priority adjusted, and
272 * is waiting on a mutex) 329 * is waiting on a mutex)
330 * @next_lock: the mutex on which the owner of @orig_lock was blocked before
331 * we dropped its pi_lock. Is never dereferenced, only used for
332 * comparison to detect lock chain changes.
273 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 333 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
274 * its priority to the mutex owner (can be NULL in the case 334 * its priority to the mutex owner (can be NULL in the case
275 * depicted above or if the top waiter is gone away and we are 335 * depicted above or if the top waiter is gone away and we are
276 * actually deboosting the owner) 336 * actually deboosting the owner)
277 * @top_task: the current top waiter 337 * @top_task: the current top waiter
278 * 338 *
279 * Returns 0 or -EDEADLK. 339 * Returns 0 or -EDEADLK.
280 */ 340 */
281static int rt_mutex_adjust_prio_chain(struct task_struct *task, 341static int rt_mutex_adjust_prio_chain(struct task_struct *task,
282 int deadlock_detect, 342 int deadlock_detect,
283 struct rt_mutex *orig_lock, 343 struct rt_mutex *orig_lock,
344 struct rt_mutex *next_lock,
284 struct rt_mutex_waiter *orig_waiter, 345 struct rt_mutex_waiter *orig_waiter,
285 struct task_struct *top_task) 346 struct task_struct *top_task)
286{ 347{
@@ -314,7 +375,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
314 } 375 }
315 put_task_struct(task); 376 put_task_struct(task);
316 377
317 return deadlock_detect ? -EDEADLK : 0; 378 return -EDEADLK;
318 } 379 }
319 retry: 380 retry:
320 /* 381 /*
@@ -339,13 +400,32 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
339 goto out_unlock_pi; 400 goto out_unlock_pi;
340 401
341 /* 402 /*
403 * We dropped all locks after taking a refcount on @task, so
404 * the task might have moved on in the lock chain or even left
405 * the chain completely and blocks now on an unrelated lock or
406 * on @orig_lock.
407 *
408 * We stored the lock on which @task was blocked in @next_lock,
409 * so we can detect the chain change.
410 */
411 if (next_lock != waiter->lock)
412 goto out_unlock_pi;
413
414 /*
342 * Drop out, when the task has no waiters. Note, 415 * Drop out, when the task has no waiters. Note,
343 * top_waiter can be NULL, when we are in the deboosting 416 * top_waiter can be NULL, when we are in the deboosting
344 * mode! 417 * mode!
345 */ 418 */
346 if (top_waiter && (!task_has_pi_waiters(task) || 419 if (top_waiter) {
347 top_waiter != task_top_pi_waiter(task))) 420 if (!task_has_pi_waiters(task))
348 goto out_unlock_pi; 421 goto out_unlock_pi;
422 /*
423 * If deadlock detection is off, we stop here if we
424 * are not the top pi waiter of the task.
425 */
426 if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
427 goto out_unlock_pi;
428 }
349 429
350 /* 430 /*
351 * When deadlock detection is off then we check, if further 431 * When deadlock detection is off then we check, if further
@@ -361,11 +441,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
361 goto retry; 441 goto retry;
362 } 442 }
363 443
364 /* Deadlock detection */ 444 /*
445 * Deadlock detection. If the lock is the same as the original
446 * lock which caused us to walk the lock chain or if the
447 * current lock is owned by the task which initiated the chain
448 * walk, we detected a deadlock.
449 */
365 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 450 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
366 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 451 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
367 raw_spin_unlock(&lock->wait_lock); 452 raw_spin_unlock(&lock->wait_lock);
368 ret = deadlock_detect ? -EDEADLK : 0; 453 ret = -EDEADLK;
369 goto out_unlock_pi; 454 goto out_unlock_pi;
370 } 455 }
371 456
@@ -410,11 +495,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
410 __rt_mutex_adjust_prio(task); 495 __rt_mutex_adjust_prio(task);
411 } 496 }
412 497
498 /*
499 * Check whether the task which owns the current lock is pi
500 * blocked itself. If yes we store a pointer to the lock for
501 * the lock chain change detection above. After we dropped
502 * task->pi_lock next_lock cannot be dereferenced anymore.
503 */
504 next_lock = task_blocked_on_lock(task);
505
413 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 506 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
414 507
415 top_waiter = rt_mutex_top_waiter(lock); 508 top_waiter = rt_mutex_top_waiter(lock);
416 raw_spin_unlock(&lock->wait_lock); 509 raw_spin_unlock(&lock->wait_lock);
417 510
511 /*
512 * We reached the end of the lock chain. Stop right here. No
513 * point to go back just to figure that out.
514 */
515 if (!next_lock)
516 goto out_put_task;
517
418 if (!detect_deadlock && waiter != top_waiter) 518 if (!detect_deadlock && waiter != top_waiter)
419 goto out_put_task; 519 goto out_put_task;
420 520
@@ -524,8 +624,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
524{ 624{
525 struct task_struct *owner = rt_mutex_owner(lock); 625 struct task_struct *owner = rt_mutex_owner(lock);
526 struct rt_mutex_waiter *top_waiter = waiter; 626 struct rt_mutex_waiter *top_waiter = waiter;
527 unsigned long flags; 627 struct rt_mutex *next_lock;
528 int chain_walk = 0, res; 628 int chain_walk = 0, res;
629 unsigned long flags;
630
631 /*
632 * Early deadlock detection. We really don't want the task to
633 * enqueue on itself just to untangle the mess later. It's not
634 * only an optimization. We drop the locks, so another waiter
635 * can come in before the chain walk detects the deadlock. So
636 * the other will detect the deadlock and return -EDEADLOCK,
637 * which is wrong, as the other waiter is not in a deadlock
638 * situation.
639 */
640 if (owner == task)
641 return -EDEADLK;
529 642
530 raw_spin_lock_irqsave(&task->pi_lock, flags); 643 raw_spin_lock_irqsave(&task->pi_lock, flags);
531 __rt_mutex_adjust_prio(task); 644 __rt_mutex_adjust_prio(task);
@@ -545,20 +658,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
545 if (!owner) 658 if (!owner)
546 return 0; 659 return 0;
547 660
661 raw_spin_lock_irqsave(&owner->pi_lock, flags);
548 if (waiter == rt_mutex_top_waiter(lock)) { 662 if (waiter == rt_mutex_top_waiter(lock)) {
549 raw_spin_lock_irqsave(&owner->pi_lock, flags);
550 rt_mutex_dequeue_pi(owner, top_waiter); 663 rt_mutex_dequeue_pi(owner, top_waiter);
551 rt_mutex_enqueue_pi(owner, waiter); 664 rt_mutex_enqueue_pi(owner, waiter);
552 665
553 __rt_mutex_adjust_prio(owner); 666 __rt_mutex_adjust_prio(owner);
554 if (owner->pi_blocked_on) 667 if (owner->pi_blocked_on)
555 chain_walk = 1; 668 chain_walk = 1;
556 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 669 } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
557 }
558 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
559 chain_walk = 1; 670 chain_walk = 1;
671 }
672
673 /* Store the lock on which owner is blocked or NULL */
674 next_lock = task_blocked_on_lock(owner);
560 675
561 if (!chain_walk) 676 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
677 /*
678 * Even if full deadlock detection is on, if the owner is not
679 * blocked itself, we can avoid finding this out in the chain
680 * walk.
681 */
682 if (!chain_walk || !next_lock)
562 return 0; 683 return 0;
563 684
564 /* 685 /*
@@ -570,8 +691,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
570 691
571 raw_spin_unlock(&lock->wait_lock); 692 raw_spin_unlock(&lock->wait_lock);
572 693
573 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 694 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
574 task); 695 next_lock, waiter, task);
575 696
576 raw_spin_lock(&lock->wait_lock); 697 raw_spin_lock(&lock->wait_lock);
577 698
@@ -581,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
581/* 702/*
582 * Wake up the next waiter on the lock. 703 * Wake up the next waiter on the lock.
583 * 704 *
584 * Remove the top waiter from the current tasks waiter list and wake it up. 705 * Remove the top waiter from the current tasks pi waiter list and
706 * wake it up.
585 * 707 *
586 * Called with lock->wait_lock held. 708 * Called with lock->wait_lock held.
587 */ 709 */
@@ -602,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
602 */ 724 */
603 rt_mutex_dequeue_pi(current, waiter); 725 rt_mutex_dequeue_pi(current, waiter);
604 726
605 rt_mutex_set_owner(lock, NULL); 727 /*
728 * As we are waking up the top waiter, and the waiter stays
729 * queued on the lock until it gets the lock, this lock
730 * obviously has waiters. Just set the bit here and this has
731 * the added benefit of forcing all new tasks into the
732 * slow path making sure no task of lower priority than
733 * the top waiter can steal this lock.
734 */
735 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
606 736
607 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 737 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
608 738
739 /*
740 * It's safe to dereference waiter as it cannot go away as
741 * long as we hold lock->wait_lock. The waiter task needs to
742 * acquire it in order to dequeue the waiter.
743 */
609 wake_up_process(waiter->task); 744 wake_up_process(waiter->task);
610} 745}
611 746
@@ -620,8 +755,8 @@ static void remove_waiter(struct rt_mutex *lock,
620{ 755{
621 int first = (waiter == rt_mutex_top_waiter(lock)); 756 int first = (waiter == rt_mutex_top_waiter(lock));
622 struct task_struct *owner = rt_mutex_owner(lock); 757 struct task_struct *owner = rt_mutex_owner(lock);
758 struct rt_mutex *next_lock = NULL;
623 unsigned long flags; 759 unsigned long flags;
624 int chain_walk = 0;
625 760
626 raw_spin_lock_irqsave(&current->pi_lock, flags); 761 raw_spin_lock_irqsave(&current->pi_lock, flags);
627 rt_mutex_dequeue(lock, waiter); 762 rt_mutex_dequeue(lock, waiter);
@@ -645,13 +780,13 @@ static void remove_waiter(struct rt_mutex *lock,
645 } 780 }
646 __rt_mutex_adjust_prio(owner); 781 __rt_mutex_adjust_prio(owner);
647 782
648 if (owner->pi_blocked_on) 783 /* Store the lock on which owner is blocked or NULL */
649 chain_walk = 1; 784 next_lock = task_blocked_on_lock(owner);
650 785
651 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 786 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
652 } 787 }
653 788
654 if (!chain_walk) 789 if (!next_lock)
655 return; 790 return;
656 791
657 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 792 /* gets dropped in rt_mutex_adjust_prio_chain()! */
@@ -659,7 +794,7 @@ static void remove_waiter(struct rt_mutex *lock,
659 794
660 raw_spin_unlock(&lock->wait_lock); 795 raw_spin_unlock(&lock->wait_lock);
661 796
662 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 797 rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
663 798
664 raw_spin_lock(&lock->wait_lock); 799 raw_spin_lock(&lock->wait_lock);
665} 800}
@@ -672,6 +807,7 @@ static void remove_waiter(struct rt_mutex *lock,
672void rt_mutex_adjust_pi(struct task_struct *task) 807void rt_mutex_adjust_pi(struct task_struct *task)
673{ 808{
674 struct rt_mutex_waiter *waiter; 809 struct rt_mutex_waiter *waiter;
810 struct rt_mutex *next_lock;
675 unsigned long flags; 811 unsigned long flags;
676 812
677 raw_spin_lock_irqsave(&task->pi_lock, flags); 813 raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -682,12 +818,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
682 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 818 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
683 return; 819 return;
684 } 820 }
685 821 next_lock = waiter->lock;
686 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 822 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
687 823
688 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 824 /* gets dropped in rt_mutex_adjust_prio_chain()! */
689 get_task_struct(task); 825 get_task_struct(task);
690 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); 826
827 rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
691} 828}
692 829
693/** 830/**
@@ -739,6 +876,26 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
739 return ret; 876 return ret;
740} 877}
741 878
879static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
880 struct rt_mutex_waiter *w)
881{
882 /*
883 * If the result is not -EDEADLOCK or the caller requested
884 * deadlock detection, nothing to do here.
885 */
886 if (res != -EDEADLOCK || detect_deadlock)
887 return;
888
889 /*
890 * Yell lowdly and stop the task right here.
891 */
892 rt_mutex_print_deadlock(w);
893 while (1) {
894 set_current_state(TASK_INTERRUPTIBLE);
895 schedule();
896 }
897}
898
742/* 899/*
743 * Slow path lock function: 900 * Slow path lock function:
744 */ 901 */
@@ -778,8 +935,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
778 935
779 set_current_state(TASK_RUNNING); 936 set_current_state(TASK_RUNNING);
780 937
781 if (unlikely(ret)) 938 if (unlikely(ret)) {
782 remove_waiter(lock, &waiter); 939 remove_waiter(lock, &waiter);
940 rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
941 }
783 942
784 /* 943 /*
785 * try_to_take_rt_mutex() sets the waiter bit 944 * try_to_take_rt_mutex() sets the waiter bit
@@ -835,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
835 994
836 rt_mutex_deadlock_account_unlock(current); 995 rt_mutex_deadlock_account_unlock(current);
837 996
838 if (!rt_mutex_has_waiters(lock)) { 997 /*
839 lock->owner = NULL; 998 * We must be careful here if the fast path is enabled. If we
840 raw_spin_unlock(&lock->wait_lock); 999 * have no waiters queued we cannot set owner to NULL here
841 return; 1000 * because of:
1001 *
1002 * foo->lock->owner = NULL;
1003 * rtmutex_lock(foo->lock); <- fast path
1004 * free = atomic_dec_and_test(foo->refcnt);
1005 * rtmutex_unlock(foo->lock); <- fast path
1006 * if (free)
1007 * kfree(foo);
1008 * raw_spin_unlock(foo->lock->wait_lock);
1009 *
1010 * So for the fastpath enabled kernel:
1011 *
1012 * Nothing can set the waiters bit as long as we hold
1013 * lock->wait_lock. So we do the following sequence:
1014 *
1015 * owner = rt_mutex_owner(lock);
1016 * clear_rt_mutex_waiters(lock);
1017 * raw_spin_unlock(&lock->wait_lock);
1018 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1019 * return;
1020 * goto retry;
1021 *
1022 * The fastpath disabled variant is simple as all access to
1023 * lock->owner is serialized by lock->wait_lock:
1024 *
1025 * lock->owner = NULL;
1026 * raw_spin_unlock(&lock->wait_lock);
1027 */
1028 while (!rt_mutex_has_waiters(lock)) {
1029 /* Drops lock->wait_lock ! */
1030 if (unlock_rt_mutex_safe(lock) == true)
1031 return;
1032 /* Relock the rtmutex and try again */
1033 raw_spin_lock(&lock->wait_lock);
842 } 1034 }
843 1035
1036 /*
1037 * The wakeup next waiter path does not suffer from the above
1038 * race. See the comments there.
1039 */
844 wakeup_next_waiter(lock); 1040 wakeup_next_waiter(lock);
845 1041
846 raw_spin_unlock(&lock->wait_lock); 1042 raw_spin_unlock(&lock->wait_lock);
@@ -1088,7 +1284,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1088 return 1; 1284 return 1;
1089 } 1285 }
1090 1286
1091 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1287 /* We enforce deadlock detection for futexes */
1288 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
1092 1289
1093 if (ret && !rt_mutex_owner(lock)) { 1290 if (ret && !rt_mutex_owner(lock)) {
1094 /* 1291 /*
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index a1a1dd06421d..f6a1f3c133b1 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -24,3 +24,8 @@
24#define debug_rt_mutex_print_deadlock(w) do { } while (0) 24#define debug_rt_mutex_print_deadlock(w) do { } while (0)
25#define debug_rt_mutex_detect_deadlock(w,d) (d) 25#define debug_rt_mutex_detect_deadlock(w,d) (d)
26#define debug_rt_mutex_reset_waiter(w) do { } while (0) 26#define debug_rt_mutex_reset_waiter(w) do { } while (0)
27
28static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
29{
30 WARN(1, "rtmutex deadlock detected\n");
31}
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 1d66e08e897d..dacc32142fcc 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -5,11 +5,66 @@
5 * 5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com> 7 * and Michel Lespinasse <walken@google.com>
8 *
9 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
10 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
8 */ 11 */
9#include <linux/rwsem.h> 12#include <linux/rwsem.h>
10#include <linux/sched.h> 13#include <linux/sched.h>
11#include <linux/init.h> 14#include <linux/init.h>
12#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/sched/rt.h>
17
18#include "mcs_spinlock.h"
19
20/*
21 * Guide to the rw_semaphore's count field for common values.
22 * (32-bit case illustrated, similar for 64-bit)
23 *
24 * 0x0000000X (1) X readers active or attempting lock, no writer waiting
25 * X = #active_readers + #readers attempting to lock
26 * (X*ACTIVE_BIAS)
27 *
28 * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
29 * attempting to read lock or write lock.
30 *
31 * 0xffff000X (1) X readers active or attempting lock, with waiters for lock
32 * X = #active readers + # readers attempting lock
33 * (X*ACTIVE_BIAS + WAITING_BIAS)
34 * (2) 1 writer attempting lock, no waiters for lock
35 * X-1 = #active readers + #readers attempting lock
36 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
37 * (3) 1 writer active, no waiters for lock
38 * X-1 = #active readers + #readers attempting lock
39 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
40 *
41 * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
42 * (WAITING_BIAS + ACTIVE_BIAS)
43 * (2) 1 writer active or attempting lock, no waiters for lock
44 * (ACTIVE_WRITE_BIAS)
45 *
46 * 0xffff0000 (1) There are writers or readers queued but none active
47 * or in the process of attempting lock.
48 * (WAITING_BIAS)
49 * Note: writer can attempt to steal lock for this count by adding
50 * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
51 *
52 * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
53 * (ACTIVE_WRITE_BIAS + WAITING_BIAS)
54 *
55 * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
56 * the count becomes more than 0 for successful lock acquisition,
57 * i.e. the case where there are only readers or nobody has lock.
58 * (1st and 2nd case above).
59 *
60 * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
61 * checking the count becomes ACTIVE_WRITE_BIAS for successful lock
62 * acquisition (i.e. nobody else has lock or attempts lock). If
63 * unsuccessful, in rwsem_down_write_failed, we'll check to see if there
64 * are only waiters but none active (5th case above), and attempt to
65 * steal the lock.
66 *
67 */
13 68
14/* 69/*
15 * Initialize an rwsem: 70 * Initialize an rwsem:
@@ -27,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
27 sem->count = RWSEM_UNLOCKED_VALUE; 82 sem->count = RWSEM_UNLOCKED_VALUE;
28 raw_spin_lock_init(&sem->wait_lock); 83 raw_spin_lock_init(&sem->wait_lock);
29 INIT_LIST_HEAD(&sem->wait_list); 84 INIT_LIST_HEAD(&sem->wait_list);
85#ifdef CONFIG_SMP
86 sem->owner = NULL;
87 sem->osq = NULL;
88#endif
30} 89}
31 90
32EXPORT_SYMBOL(__init_rwsem); 91EXPORT_SYMBOL(__init_rwsem);
@@ -141,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
141} 200}
142 201
143/* 202/*
144 * wait for the read lock to be granted 203 * Wait for the read lock to be granted
145 */ 204 */
146__visible 205__visible
147struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) 206struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -188,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
188 return sem; 247 return sem;
189} 248}
190 249
250static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
251{
252 if (!(count & RWSEM_ACTIVE_MASK)) {
253 /* try acquiring the write lock */
254 if (sem->count == RWSEM_WAITING_BIAS &&
255 cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
256 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
257 if (!list_is_singular(&sem->wait_list))
258 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
259 return true;
260 }
261 }
262 return false;
263}
264
265#ifdef CONFIG_SMP
191/* 266/*
192 * wait until we successfully acquire the write lock 267 * Try to acquire write lock before the writer has been put on wait queue.
268 */
269static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
270{
271 long old, count = ACCESS_ONCE(sem->count);
272
273 while (true) {
274 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
275 return false;
276
277 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
278 if (old == count)
279 return true;
280
281 count = old;
282 }
283}
284
285static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
286{
287 struct task_struct *owner;
288 bool on_cpu = true;
289
290 if (need_resched())
291 return 0;
292
293 rcu_read_lock();
294 owner = ACCESS_ONCE(sem->owner);
295 if (owner)
296 on_cpu = owner->on_cpu;
297 rcu_read_unlock();
298
299 /*
300 * If sem->owner is not set, the rwsem owner may have
301 * just acquired it and not set the owner yet or the rwsem
302 * has been released.
303 */
304 return on_cpu;
305}
306
307static inline bool owner_running(struct rw_semaphore *sem,
308 struct task_struct *owner)
309{
310 if (sem->owner != owner)
311 return false;
312
313 /*
314 * Ensure we emit the owner->on_cpu, dereference _after_ checking
315 * sem->owner still matches owner, if that fails, owner might
316 * point to free()d memory, if it still matches, the rcu_read_lock()
317 * ensures the memory stays valid.
318 */
319 barrier();
320
321 return owner->on_cpu;
322}
323
324static noinline
325bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
326{
327 rcu_read_lock();
328 while (owner_running(sem, owner)) {
329 if (need_resched())
330 break;
331
332 arch_mutex_cpu_relax();
333 }
334 rcu_read_unlock();
335
336 /*
337 * We break out the loop above on need_resched() or when the
338 * owner changed, which is a sign for heavy contention. Return
339 * success only when sem->owner is NULL.
340 */
341 return sem->owner == NULL;
342}
343
344static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
345{
346 struct task_struct *owner;
347 bool taken = false;
348
349 preempt_disable();
350
351 /* sem->wait_lock should not be held when doing optimistic spinning */
352 if (!rwsem_can_spin_on_owner(sem))
353 goto done;
354
355 if (!osq_lock(&sem->osq))
356 goto done;
357
358 while (true) {
359 owner = ACCESS_ONCE(sem->owner);
360 if (owner && !rwsem_spin_on_owner(sem, owner))
361 break;
362
363 /* wait_lock will be acquired if write_lock is obtained */
364 if (rwsem_try_write_lock_unqueued(sem)) {
365 taken = true;
366 break;
367 }
368
369 /*
370 * When there's no owner, we might have preempted between the
371 * owner acquiring the lock and setting the owner field. If
372 * we're an RT task that will live-lock because we won't let
373 * the owner complete.
374 */
375 if (!owner && (need_resched() || rt_task(current)))
376 break;
377
378 /*
379 * The cpu_relax() call is a compiler barrier which forces
380 * everything in this loop to be re-loaded. We don't need
381 * memory barriers as we'll eventually observe the right
382 * values at the cost of a few extra spins.
383 */
384 arch_mutex_cpu_relax();
385 }
386 osq_unlock(&sem->osq);
387done:
388 preempt_enable();
389 return taken;
390}
391
392#else
393static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
394{
395 return false;
396}
397#endif
398
399/*
400 * Wait until we successfully acquire the write lock
193 */ 401 */
194__visible 402__visible
195struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) 403struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
196{ 404{
197 long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; 405 long count;
406 bool waiting = true; /* any queued threads before us */
198 struct rwsem_waiter waiter; 407 struct rwsem_waiter waiter;
199 struct task_struct *tsk = current;
200 408
201 /* set up my own style of waitqueue */ 409 /* undo write bias from down_write operation, stop active locking */
202 waiter.task = tsk; 410 count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
411
412 /* do optimistic spinning and steal lock if possible */
413 if (rwsem_optimistic_spin(sem))
414 return sem;
415
416 /*
417 * Optimistic spinning failed, proceed to the slowpath
418 * and block until we can acquire the sem.
419 */
420 waiter.task = current;
203 waiter.type = RWSEM_WAITING_FOR_WRITE; 421 waiter.type = RWSEM_WAITING_FOR_WRITE;
204 422
205 raw_spin_lock_irq(&sem->wait_lock); 423 raw_spin_lock_irq(&sem->wait_lock);
424
425 /* account for this before adding a new element to the list */
206 if (list_empty(&sem->wait_list)) 426 if (list_empty(&sem->wait_list))
207 adjustment += RWSEM_WAITING_BIAS; 427 waiting = false;
428
208 list_add_tail(&waiter.list, &sem->wait_list); 429 list_add_tail(&waiter.list, &sem->wait_list);
209 430
210 /* we're now waiting on the lock, but no longer actively locking */ 431 /* we're now waiting on the lock, but no longer actively locking */
211 count = rwsem_atomic_update(adjustment, sem); 432 if (waiting) {
433 count = ACCESS_ONCE(sem->count);
434
435 /*
436 * If there were already threads queued before us and there are
437 * no active writers, the lock must be read owned; so we try to
438 * wake any read locks that were queued ahead of us.
439 */
440 if (count > RWSEM_WAITING_BIAS)
441 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
212 442
213 /* If there were already threads queued before us and there are no 443 } else
214 * active writers, the lock must be read owned; so we try to wake 444 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
215 * any read locks that were queued ahead of us. */
216 if (count > RWSEM_WAITING_BIAS &&
217 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
218 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
219 445
220 /* wait until we successfully acquire the lock */ 446 /* wait until we successfully acquire the lock */
221 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 447 set_current_state(TASK_UNINTERRUPTIBLE);
222 while (true) { 448 while (true) {
223 if (!(count & RWSEM_ACTIVE_MASK)) { 449 if (rwsem_try_write_lock(count, sem))
224 /* Try acquiring the write lock. */ 450 break;
225 count = RWSEM_ACTIVE_WRITE_BIAS;
226 if (!list_is_singular(&sem->wait_list))
227 count += RWSEM_WAITING_BIAS;
228
229 if (sem->count == RWSEM_WAITING_BIAS &&
230 cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
231 RWSEM_WAITING_BIAS)
232 break;
233 }
234
235 raw_spin_unlock_irq(&sem->wait_lock); 451 raw_spin_unlock_irq(&sem->wait_lock);
236 452
237 /* Block until there are no active lockers. */ 453 /* Block until there are no active lockers. */
238 do { 454 do {
239 schedule(); 455 schedule();
240 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 456 set_current_state(TASK_UNINTERRUPTIBLE);
241 } while ((count = sem->count) & RWSEM_ACTIVE_MASK); 457 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
242 458
243 raw_spin_lock_irq(&sem->wait_lock); 459 raw_spin_lock_irq(&sem->wait_lock);
244 } 460 }
461 __set_current_state(TASK_RUNNING);
245 462
246 list_del(&waiter.list); 463 list_del(&waiter.list);
247 raw_spin_unlock_irq(&sem->wait_lock); 464 raw_spin_unlock_irq(&sem->wait_lock);
248 tsk->state = TASK_RUNNING;
249 465
250 return sem; 466 return sem;
251} 467}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff1435bdfb..42f806de49d4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,6 +12,27 @@
12 12
13#include <linux/atomic.h> 13#include <linux/atomic.h>
14 14
15#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
16static inline void rwsem_set_owner(struct rw_semaphore *sem)
17{
18 sem->owner = current;
19}
20
21static inline void rwsem_clear_owner(struct rw_semaphore *sem)
22{
23 sem->owner = NULL;
24}
25
26#else
27static inline void rwsem_set_owner(struct rw_semaphore *sem)
28{
29}
30
31static inline void rwsem_clear_owner(struct rw_semaphore *sem)
32{
33}
34#endif
35
15/* 36/*
16 * lock for reading 37 * lock for reading
17 */ 38 */
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 69 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49 70
50 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 71 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
72 rwsem_set_owner(sem);
51} 73}
52 74
53EXPORT_SYMBOL(down_write); 75EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
59{ 81{
60 int ret = __down_write_trylock(sem); 82 int ret = __down_write_trylock(sem);
61 83
62 if (ret == 1) 84 if (ret == 1) {
63 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); 85 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
86 rwsem_set_owner(sem);
87 }
88
64 return ret; 89 return ret;
65} 90}
66 91
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
85{ 110{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_); 111 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87 112
113 rwsem_clear_owner(sem);
88 __up_write(sem); 114 __up_write(sem);
89} 115}
90 116
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
99 * lockdep: a downgraded write will live on as a write 125 * lockdep: a downgraded write will live on as a write
100 * dependency. 126 * dependency.
101 */ 127 */
128 rwsem_clear_owner(sem);
102 __downgrade_write(sem); 129 __downgrade_write(sem);
103} 130}
104 131
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
122 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); 149 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
123 150
124 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 151 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
152 rwsem_set_owner(sem);
125} 153}
126 154
127EXPORT_SYMBOL(_down_write_nest_lock); 155EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
141 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 169 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
142 170
143 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 171 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
172 rwsem_set_owner(sem);
144} 173}
145 174
146EXPORT_SYMBOL(down_write_nested); 175EXPORT_SYMBOL(down_write_nested);