diff options
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/Makefile | 1 | ||||
| -rw-r--r-- | kernel/locking/lockdep_internals.h | 6 | ||||
| -rw-r--r-- | kernel/locking/locktorture.c | 12 | ||||
| -rw-r--r-- | kernel/locking/qrwlock.c | 133 | ||||
| -rw-r--r-- | kernel/locking/rtmutex-debug.h | 5 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 273 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.h | 5 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 274 | ||||
| -rw-r--r-- | kernel/locking/rwsem.c | 31 |
9 files changed, 664 insertions, 76 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index b8bdcd4785b7..8541bfdfd232 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |||
| 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o |
| 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o |
| 27 | obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o | ||
| 27 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | 28 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o |
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 4f560cfedc8f..51c4b24b6328 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h | |||
| @@ -54,9 +54,9 @@ enum { | |||
| 54 | * table (if it's not there yet), and we check it for lock order | 54 | * table (if it's not there yet), and we check it for lock order |
| 55 | * conflicts and deadlocks. | 55 | * conflicts and deadlocks. |
| 56 | */ | 56 | */ |
| 57 | #define MAX_LOCKDEP_ENTRIES 16384UL | 57 | #define MAX_LOCKDEP_ENTRIES 32768UL |
| 58 | 58 | ||
| 59 | #define MAX_LOCKDEP_CHAINS_BITS 15 | 59 | #define MAX_LOCKDEP_CHAINS_BITS 16 |
| 60 | #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) | 60 | #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) |
| 61 | 61 | ||
| 62 | #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) | 62 | #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) |
| @@ -65,7 +65,7 @@ enum { | |||
| 65 | * Stack-trace: tightly packed array of stack backtrace | 65 | * Stack-trace: tightly packed array of stack backtrace |
| 66 | * addresses. Protected by the hash_lock. | 66 | * addresses. Protected by the hash_lock. |
| 67 | */ | 67 | */ |
| 68 | #define MAX_STACK_TRACE_ENTRIES 262144UL | 68 | #define MAX_STACK_TRACE_ENTRIES 524288UL |
| 69 | 69 | ||
| 70 | extern struct list_head all_lock_classes; | 70 | extern struct list_head all_lock_classes; |
| 71 | extern struct lock_chain lock_chains[]; | 71 | extern struct lock_chain lock_chains[]; |
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index f26b1a18e34e..0955b885d0dc 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c | |||
| @@ -82,14 +82,14 @@ struct lock_writer_stress_stats { | |||
| 82 | }; | 82 | }; |
| 83 | static struct lock_writer_stress_stats *lwsa; | 83 | static struct lock_writer_stress_stats *lwsa; |
| 84 | 84 | ||
| 85 | #if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE) | 85 | #if defined(MODULE) |
| 86 | #define LOCKTORTURE_RUNNABLE_INIT 1 | 86 | #define LOCKTORTURE_RUNNABLE_INIT 1 |
| 87 | #else | 87 | #else |
| 88 | #define LOCKTORTURE_RUNNABLE_INIT 0 | 88 | #define LOCKTORTURE_RUNNABLE_INIT 0 |
| 89 | #endif | 89 | #endif |
| 90 | int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; | 90 | int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; |
| 91 | module_param(locktorture_runnable, int, 0444); | 91 | module_param(locktorture_runnable, int, 0444); |
| 92 | MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot"); | 92 | MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init"); |
| 93 | 93 | ||
| 94 | /* Forward reference. */ | 94 | /* Forward reference. */ |
| 95 | static void lock_torture_cleanup(void); | 95 | static void lock_torture_cleanup(void); |
| @@ -216,10 +216,11 @@ static int lock_torture_writer(void *arg) | |||
| 216 | static DEFINE_TORTURE_RANDOM(rand); | 216 | static DEFINE_TORTURE_RANDOM(rand); |
| 217 | 217 | ||
| 218 | VERBOSE_TOROUT_STRING("lock_torture_writer task started"); | 218 | VERBOSE_TOROUT_STRING("lock_torture_writer task started"); |
| 219 | set_user_nice(current, 19); | 219 | set_user_nice(current, MAX_NICE); |
| 220 | 220 | ||
| 221 | do { | 221 | do { |
| 222 | schedule_timeout_uninterruptible(1); | 222 | if ((torture_random(&rand) & 0xfffff) == 0) |
| 223 | schedule_timeout_uninterruptible(1); | ||
| 223 | cur_ops->writelock(); | 224 | cur_ops->writelock(); |
| 224 | if (WARN_ON_ONCE(lock_is_write_held)) | 225 | if (WARN_ON_ONCE(lock_is_write_held)) |
| 225 | lwsp->n_write_lock_fail++; | 226 | lwsp->n_write_lock_fail++; |
| @@ -354,7 +355,8 @@ static int __init lock_torture_init(void) | |||
| 354 | &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, | 355 | &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, |
| 355 | }; | 356 | }; |
| 356 | 357 | ||
| 357 | torture_init_begin(torture_type, verbose, &locktorture_runnable); | 358 | if (!torture_init_begin(torture_type, verbose, &locktorture_runnable)) |
| 359 | return -EBUSY; | ||
| 358 | 360 | ||
| 359 | /* Process args and tell the world that the torturer is on the job. */ | 361 | /* Process args and tell the world that the torturer is on the job. */ |
| 360 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { | 362 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { |
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c new file mode 100644 index 000000000000..fb5b8ac411a5 --- /dev/null +++ b/kernel/locking/qrwlock.c | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | /* | ||
| 2 | * Queue read/write lock | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. | ||
| 15 | * | ||
| 16 | * Authors: Waiman Long <waiman.long@hp.com> | ||
| 17 | */ | ||
| 18 | #include <linux/smp.h> | ||
| 19 | #include <linux/bug.h> | ||
| 20 | #include <linux/cpumask.h> | ||
| 21 | #include <linux/percpu.h> | ||
| 22 | #include <linux/hardirq.h> | ||
| 23 | #include <linux/mutex.h> | ||
| 24 | #include <asm/qrwlock.h> | ||
| 25 | |||
| 26 | /** | ||
| 27 | * rspin_until_writer_unlock - inc reader count & spin until writer is gone | ||
| 28 | * @lock : Pointer to queue rwlock structure | ||
| 29 | * @writer: Current queue rwlock writer status byte | ||
| 30 | * | ||
| 31 | * In interrupt context or at the head of the queue, the reader will just | ||
| 32 | * increment the reader count & wait until the writer releases the lock. | ||
| 33 | */ | ||
| 34 | static __always_inline void | ||
| 35 | rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) | ||
| 36 | { | ||
| 37 | while ((cnts & _QW_WMASK) == _QW_LOCKED) { | ||
| 38 | arch_mutex_cpu_relax(); | ||
| 39 | cnts = smp_load_acquire((u32 *)&lock->cnts); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | /** | ||
| 44 | * queue_read_lock_slowpath - acquire read lock of a queue rwlock | ||
| 45 | * @lock: Pointer to queue rwlock structure | ||
| 46 | */ | ||
| 47 | void queue_read_lock_slowpath(struct qrwlock *lock) | ||
| 48 | { | ||
| 49 | u32 cnts; | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Readers come here when they cannot get the lock without waiting | ||
| 53 | */ | ||
| 54 | if (unlikely(in_interrupt())) { | ||
| 55 | /* | ||
| 56 | * Readers in interrupt context will spin until the lock is | ||
| 57 | * available without waiting in the queue. | ||
| 58 | */ | ||
| 59 | cnts = smp_load_acquire((u32 *)&lock->cnts); | ||
| 60 | rspin_until_writer_unlock(lock, cnts); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | atomic_sub(_QR_BIAS, &lock->cnts); | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Put the reader into the wait queue | ||
| 67 | */ | ||
| 68 | arch_spin_lock(&lock->lock); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * At the head of the wait queue now, wait until the writer state | ||
| 72 | * goes to 0 and then try to increment the reader count and get | ||
| 73 | * the lock. It is possible that an incoming writer may steal the | ||
| 74 | * lock in the interim, so it is necessary to check the writer byte | ||
| 75 | * to make sure that the write lock isn't taken. | ||
| 76 | */ | ||
| 77 | while (atomic_read(&lock->cnts) & _QW_WMASK) | ||
| 78 | arch_mutex_cpu_relax(); | ||
| 79 | |||
| 80 | cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; | ||
| 81 | rspin_until_writer_unlock(lock, cnts); | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Signal the next one in queue to become queue head | ||
| 85 | */ | ||
| 86 | arch_spin_unlock(&lock->lock); | ||
| 87 | } | ||
| 88 | EXPORT_SYMBOL(queue_read_lock_slowpath); | ||
| 89 | |||
| 90 | /** | ||
| 91 | * queue_write_lock_slowpath - acquire write lock of a queue rwlock | ||
| 92 | * @lock : Pointer to queue rwlock structure | ||
| 93 | */ | ||
| 94 | void queue_write_lock_slowpath(struct qrwlock *lock) | ||
| 95 | { | ||
| 96 | u32 cnts; | ||
| 97 | |||
| 98 | /* Put the writer into the wait queue */ | ||
| 99 | arch_spin_lock(&lock->lock); | ||
| 100 | |||
| 101 | /* Try to acquire the lock directly if no reader is present */ | ||
| 102 | if (!atomic_read(&lock->cnts) && | ||
| 103 | (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)) | ||
| 104 | goto unlock; | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Set the waiting flag to notify readers that a writer is pending, | ||
| 108 | * or wait for a previous writer to go away. | ||
| 109 | */ | ||
| 110 | for (;;) { | ||
| 111 | cnts = atomic_read(&lock->cnts); | ||
| 112 | if (!(cnts & _QW_WMASK) && | ||
| 113 | (atomic_cmpxchg(&lock->cnts, cnts, | ||
| 114 | cnts | _QW_WAITING) == cnts)) | ||
| 115 | break; | ||
| 116 | |||
| 117 | arch_mutex_cpu_relax(); | ||
| 118 | } | ||
| 119 | |||
| 120 | /* When no more readers, set the locked flag */ | ||
| 121 | for (;;) { | ||
| 122 | cnts = atomic_read(&lock->cnts); | ||
| 123 | if ((cnts == _QW_WAITING) && | ||
| 124 | (atomic_cmpxchg(&lock->cnts, _QW_WAITING, | ||
| 125 | _QW_LOCKED) == _QW_WAITING)) | ||
| 126 | break; | ||
| 127 | |||
| 128 | arch_mutex_cpu_relax(); | ||
| 129 | } | ||
| 130 | unlock: | ||
| 131 | arch_spin_unlock(&lock->lock); | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(queue_write_lock_slowpath); | ||
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h index 14193d596d78..ab29b6a22669 100644 --- a/kernel/locking/rtmutex-debug.h +++ b/kernel/locking/rtmutex-debug.h | |||
| @@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, | |||
| 31 | { | 31 | { |
| 32 | return (waiter != NULL); | 32 | return (waiter != NULL); |
| 33 | } | 33 | } |
| 34 | |||
| 35 | static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) | ||
| 36 | { | ||
| 37 | debug_rt_mutex_print_deadlock(w); | ||
| 38 | } | ||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index aa4dff04b594..fc605941b9b8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | |||
| 83 | owner = *p; | 83 | owner = *p; |
| 84 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); | 84 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); |
| 85 | } | 85 | } |
| 86 | |||
| 87 | /* | ||
| 88 | * Safe fastpath aware unlock: | ||
| 89 | * 1) Clear the waiters bit | ||
| 90 | * 2) Drop lock->wait_lock | ||
| 91 | * 3) Try to unlock the lock with cmpxchg | ||
| 92 | */ | ||
| 93 | static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) | ||
| 94 | __releases(lock->wait_lock) | ||
| 95 | { | ||
| 96 | struct task_struct *owner = rt_mutex_owner(lock); | ||
| 97 | |||
| 98 | clear_rt_mutex_waiters(lock); | ||
| 99 | raw_spin_unlock(&lock->wait_lock); | ||
| 100 | /* | ||
| 101 | * If a new waiter comes in between the unlock and the cmpxchg | ||
| 102 | * we have two situations: | ||
| 103 | * | ||
| 104 | * unlock(wait_lock); | ||
| 105 | * lock(wait_lock); | ||
| 106 | * cmpxchg(p, owner, 0) == owner | ||
| 107 | * mark_rt_mutex_waiters(lock); | ||
| 108 | * acquire(lock); | ||
| 109 | * or: | ||
| 110 | * | ||
| 111 | * unlock(wait_lock); | ||
| 112 | * lock(wait_lock); | ||
| 113 | * mark_rt_mutex_waiters(lock); | ||
| 114 | * | ||
| 115 | * cmpxchg(p, owner, 0) != owner | ||
| 116 | * enqueue_waiter(); | ||
| 117 | * unlock(wait_lock); | ||
| 118 | * lock(wait_lock); | ||
| 119 | * wake waiter(); | ||
| 120 | * unlock(wait_lock); | ||
| 121 | * lock(wait_lock); | ||
| 122 | * acquire(lock); | ||
| 123 | */ | ||
| 124 | return rt_mutex_cmpxchg(lock, owner, NULL); | ||
| 125 | } | ||
| 126 | |||
| 86 | #else | 127 | #else |
| 87 | # define rt_mutex_cmpxchg(l,c,n) (0) | 128 | # define rt_mutex_cmpxchg(l,c,n) (0) |
| 88 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | 129 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) |
| @@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | |||
| 90 | lock->owner = (struct task_struct *) | 131 | lock->owner = (struct task_struct *) |
| 91 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); | 132 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); |
| 92 | } | 133 | } |
| 134 | |||
| 135 | /* | ||
| 136 | * Simple slow path only version: lock->owner is protected by lock->wait_lock. | ||
| 137 | */ | ||
| 138 | static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) | ||
| 139 | __releases(lock->wait_lock) | ||
| 140 | { | ||
| 141 | lock->owner = NULL; | ||
| 142 | raw_spin_unlock(&lock->wait_lock); | ||
| 143 | return true; | ||
| 144 | } | ||
| 93 | #endif | 145 | #endif |
| 94 | 146 | ||
| 95 | static inline int | 147 | static inline int |
| @@ -260,27 +312,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task) | |||
| 260 | */ | 312 | */ |
| 261 | int max_lock_depth = 1024; | 313 | int max_lock_depth = 1024; |
| 262 | 314 | ||
| 315 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | ||
| 316 | { | ||
| 317 | return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | ||
| 318 | } | ||
| 319 | |||
| 263 | /* | 320 | /* |
| 264 | * Adjust the priority chain. Also used for deadlock detection. | 321 | * Adjust the priority chain. Also used for deadlock detection. |
| 265 | * Decreases task's usage by one - may thus free the task. | 322 | * Decreases task's usage by one - may thus free the task. |
| 266 | * | 323 | * |
| 267 | * @task: the task owning the mutex (owner) for which a chain walk is probably | 324 | * @task: the task owning the mutex (owner) for which a chain walk is |
| 268 | * needed | 325 | * probably needed |
| 269 | * @deadlock_detect: do we have to carry out deadlock detection? | 326 | * @deadlock_detect: do we have to carry out deadlock detection? |
| 270 | * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck | 327 | * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck |
| 271 | * things for a task that has just got its priority adjusted, and | 328 | * things for a task that has just got its priority adjusted, and |
| 272 | * is waiting on a mutex) | 329 | * is waiting on a mutex) |
| 330 | * @next_lock: the mutex on which the owner of @orig_lock was blocked before | ||
| 331 | * we dropped its pi_lock. Is never dereferenced, only used for | ||
| 332 | * comparison to detect lock chain changes. | ||
| 273 | * @orig_waiter: rt_mutex_waiter struct for the task that has just donated | 333 | * @orig_waiter: rt_mutex_waiter struct for the task that has just donated |
| 274 | * its priority to the mutex owner (can be NULL in the case | 334 | * its priority to the mutex owner (can be NULL in the case |
| 275 | * depicted above or if the top waiter is gone away and we are | 335 | * depicted above or if the top waiter is gone away and we are |
| 276 | * actually deboosting the owner) | 336 | * actually deboosting the owner) |
| 277 | * @top_task: the current top waiter | 337 | * @top_task: the current top waiter |
| 278 | * | 338 | * |
| 279 | * Returns 0 or -EDEADLK. | 339 | * Returns 0 or -EDEADLK. |
| 280 | */ | 340 | */ |
| 281 | static int rt_mutex_adjust_prio_chain(struct task_struct *task, | 341 | static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
| 282 | int deadlock_detect, | 342 | int deadlock_detect, |
| 283 | struct rt_mutex *orig_lock, | 343 | struct rt_mutex *orig_lock, |
| 344 | struct rt_mutex *next_lock, | ||
| 284 | struct rt_mutex_waiter *orig_waiter, | 345 | struct rt_mutex_waiter *orig_waiter, |
| 285 | struct task_struct *top_task) | 346 | struct task_struct *top_task) |
| 286 | { | 347 | { |
| @@ -314,7 +375,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 314 | } | 375 | } |
| 315 | put_task_struct(task); | 376 | put_task_struct(task); |
| 316 | 377 | ||
| 317 | return deadlock_detect ? -EDEADLK : 0; | 378 | return -EDEADLK; |
| 318 | } | 379 | } |
| 319 | retry: | 380 | retry: |
| 320 | /* | 381 | /* |
| @@ -339,13 +400,32 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 339 | goto out_unlock_pi; | 400 | goto out_unlock_pi; |
| 340 | 401 | ||
| 341 | /* | 402 | /* |
| 403 | * We dropped all locks after taking a refcount on @task, so | ||
| 404 | * the task might have moved on in the lock chain or even left | ||
| 405 | * the chain completely and blocks now on an unrelated lock or | ||
| 406 | * on @orig_lock. | ||
| 407 | * | ||
| 408 | * We stored the lock on which @task was blocked in @next_lock, | ||
| 409 | * so we can detect the chain change. | ||
| 410 | */ | ||
| 411 | if (next_lock != waiter->lock) | ||
| 412 | goto out_unlock_pi; | ||
| 413 | |||
| 414 | /* | ||
| 342 | * Drop out, when the task has no waiters. Note, | 415 | * Drop out, when the task has no waiters. Note, |
| 343 | * top_waiter can be NULL, when we are in the deboosting | 416 | * top_waiter can be NULL, when we are in the deboosting |
| 344 | * mode! | 417 | * mode! |
| 345 | */ | 418 | */ |
| 346 | if (top_waiter && (!task_has_pi_waiters(task) || | 419 | if (top_waiter) { |
| 347 | top_waiter != task_top_pi_waiter(task))) | 420 | if (!task_has_pi_waiters(task)) |
| 348 | goto out_unlock_pi; | 421 | goto out_unlock_pi; |
| 422 | /* | ||
| 423 | * If deadlock detection is off, we stop here if we | ||
| 424 | * are not the top pi waiter of the task. | ||
| 425 | */ | ||
| 426 | if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) | ||
| 427 | goto out_unlock_pi; | ||
| 428 | } | ||
| 349 | 429 | ||
| 350 | /* | 430 | /* |
| 351 | * When deadlock detection is off then we check, if further | 431 | * When deadlock detection is off then we check, if further |
| @@ -361,11 +441,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 361 | goto retry; | 441 | goto retry; |
| 362 | } | 442 | } |
| 363 | 443 | ||
| 364 | /* Deadlock detection */ | 444 | /* |
| 445 | * Deadlock detection. If the lock is the same as the original | ||
| 446 | * lock which caused us to walk the lock chain or if the | ||
| 447 | * current lock is owned by the task which initiated the chain | ||
| 448 | * walk, we detected a deadlock. | ||
| 449 | */ | ||
| 365 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { | 450 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { |
| 366 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); | 451 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); |
| 367 | raw_spin_unlock(&lock->wait_lock); | 452 | raw_spin_unlock(&lock->wait_lock); |
| 368 | ret = deadlock_detect ? -EDEADLK : 0; | 453 | ret = -EDEADLK; |
| 369 | goto out_unlock_pi; | 454 | goto out_unlock_pi; |
| 370 | } | 455 | } |
| 371 | 456 | ||
| @@ -410,11 +495,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 410 | __rt_mutex_adjust_prio(task); | 495 | __rt_mutex_adjust_prio(task); |
| 411 | } | 496 | } |
| 412 | 497 | ||
| 498 | /* | ||
| 499 | * Check whether the task which owns the current lock is pi | ||
| 500 | * blocked itself. If yes we store a pointer to the lock for | ||
| 501 | * the lock chain change detection above. After we dropped | ||
| 502 | * task->pi_lock next_lock cannot be dereferenced anymore. | ||
| 503 | */ | ||
| 504 | next_lock = task_blocked_on_lock(task); | ||
| 505 | |||
| 413 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | 506 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| 414 | 507 | ||
| 415 | top_waiter = rt_mutex_top_waiter(lock); | 508 | top_waiter = rt_mutex_top_waiter(lock); |
| 416 | raw_spin_unlock(&lock->wait_lock); | 509 | raw_spin_unlock(&lock->wait_lock); |
| 417 | 510 | ||
| 511 | /* | ||
| 512 | * We reached the end of the lock chain. Stop right here. No | ||
| 513 | * point to go back just to figure that out. | ||
| 514 | */ | ||
| 515 | if (!next_lock) | ||
| 516 | goto out_put_task; | ||
| 517 | |||
| 418 | if (!detect_deadlock && waiter != top_waiter) | 518 | if (!detect_deadlock && waiter != top_waiter) |
| 419 | goto out_put_task; | 519 | goto out_put_task; |
| 420 | 520 | ||
| @@ -524,8 +624,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 524 | { | 624 | { |
| 525 | struct task_struct *owner = rt_mutex_owner(lock); | 625 | struct task_struct *owner = rt_mutex_owner(lock); |
| 526 | struct rt_mutex_waiter *top_waiter = waiter; | 626 | struct rt_mutex_waiter *top_waiter = waiter; |
| 527 | unsigned long flags; | 627 | struct rt_mutex *next_lock; |
| 528 | int chain_walk = 0, res; | 628 | int chain_walk = 0, res; |
| 629 | unsigned long flags; | ||
| 630 | |||
| 631 | /* | ||
| 632 | * Early deadlock detection. We really don't want the task to | ||
| 633 | * enqueue on itself just to untangle the mess later. It's not | ||
| 634 | * only an optimization. We drop the locks, so another waiter | ||
| 635 | * can come in before the chain walk detects the deadlock. So | ||
| 636 | * the other will detect the deadlock and return -EDEADLOCK, | ||
| 637 | * which is wrong, as the other waiter is not in a deadlock | ||
| 638 | * situation. | ||
| 639 | */ | ||
| 640 | if (owner == task) | ||
| 641 | return -EDEADLK; | ||
| 529 | 642 | ||
| 530 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 643 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
| 531 | __rt_mutex_adjust_prio(task); | 644 | __rt_mutex_adjust_prio(task); |
| @@ -545,20 +658,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 545 | if (!owner) | 658 | if (!owner) |
| 546 | return 0; | 659 | return 0; |
| 547 | 660 | ||
| 661 | raw_spin_lock_irqsave(&owner->pi_lock, flags); | ||
| 548 | if (waiter == rt_mutex_top_waiter(lock)) { | 662 | if (waiter == rt_mutex_top_waiter(lock)) { |
| 549 | raw_spin_lock_irqsave(&owner->pi_lock, flags); | ||
| 550 | rt_mutex_dequeue_pi(owner, top_waiter); | 663 | rt_mutex_dequeue_pi(owner, top_waiter); |
| 551 | rt_mutex_enqueue_pi(owner, waiter); | 664 | rt_mutex_enqueue_pi(owner, waiter); |
| 552 | 665 | ||
| 553 | __rt_mutex_adjust_prio(owner); | 666 | __rt_mutex_adjust_prio(owner); |
| 554 | if (owner->pi_blocked_on) | 667 | if (owner->pi_blocked_on) |
| 555 | chain_walk = 1; | 668 | chain_walk = 1; |
| 556 | raw_spin_unlock_irqrestore(&owner->pi_lock, flags); | 669 | } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { |
| 557 | } | ||
| 558 | else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) | ||
| 559 | chain_walk = 1; | 670 | chain_walk = 1; |
| 671 | } | ||
| 672 | |||
| 673 | /* Store the lock on which owner is blocked or NULL */ | ||
| 674 | next_lock = task_blocked_on_lock(owner); | ||
| 560 | 675 | ||
| 561 | if (!chain_walk) | 676 | raw_spin_unlock_irqrestore(&owner->pi_lock, flags); |
| 677 | /* | ||
| 678 | * Even if full deadlock detection is on, if the owner is not | ||
| 679 | * blocked itself, we can avoid finding this out in the chain | ||
| 680 | * walk. | ||
| 681 | */ | ||
| 682 | if (!chain_walk || !next_lock) | ||
| 562 | return 0; | 683 | return 0; |
| 563 | 684 | ||
| 564 | /* | 685 | /* |
| @@ -570,8 +691,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 570 | 691 | ||
| 571 | raw_spin_unlock(&lock->wait_lock); | 692 | raw_spin_unlock(&lock->wait_lock); |
| 572 | 693 | ||
| 573 | res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, | 694 | res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, |
| 574 | task); | 695 | next_lock, waiter, task); |
| 575 | 696 | ||
| 576 | raw_spin_lock(&lock->wait_lock); | 697 | raw_spin_lock(&lock->wait_lock); |
| 577 | 698 | ||
| @@ -581,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 581 | /* | 702 | /* |
| 582 | * Wake up the next waiter on the lock. | 703 | * Wake up the next waiter on the lock. |
| 583 | * | 704 | * |
| 584 | * Remove the top waiter from the current tasks waiter list and wake it up. | 705 | * Remove the top waiter from the current tasks pi waiter list and |
| 706 | * wake it up. | ||
| 585 | * | 707 | * |
| 586 | * Called with lock->wait_lock held. | 708 | * Called with lock->wait_lock held. |
| 587 | */ | 709 | */ |
| @@ -602,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock) | |||
| 602 | */ | 724 | */ |
| 603 | rt_mutex_dequeue_pi(current, waiter); | 725 | rt_mutex_dequeue_pi(current, waiter); |
| 604 | 726 | ||
| 605 | rt_mutex_set_owner(lock, NULL); | 727 | /* |
| 728 | * As we are waking up the top waiter, and the waiter stays | ||
| 729 | * queued on the lock until it gets the lock, this lock | ||
| 730 | * obviously has waiters. Just set the bit here and this has | ||
| 731 | * the added benefit of forcing all new tasks into the | ||
| 732 | * slow path making sure no task of lower priority than | ||
| 733 | * the top waiter can steal this lock. | ||
| 734 | */ | ||
| 735 | lock->owner = (void *) RT_MUTEX_HAS_WAITERS; | ||
| 606 | 736 | ||
| 607 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | 737 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); |
| 608 | 738 | ||
| 739 | /* | ||
| 740 | * It's safe to dereference waiter as it cannot go away as | ||
| 741 | * long as we hold lock->wait_lock. The waiter task needs to | ||
| 742 | * acquire it in order to dequeue the waiter. | ||
| 743 | */ | ||
| 609 | wake_up_process(waiter->task); | 744 | wake_up_process(waiter->task); |
| 610 | } | 745 | } |
| 611 | 746 | ||
| @@ -620,8 +755,8 @@ static void remove_waiter(struct rt_mutex *lock, | |||
| 620 | { | 755 | { |
| 621 | int first = (waiter == rt_mutex_top_waiter(lock)); | 756 | int first = (waiter == rt_mutex_top_waiter(lock)); |
| 622 | struct task_struct *owner = rt_mutex_owner(lock); | 757 | struct task_struct *owner = rt_mutex_owner(lock); |
| 758 | struct rt_mutex *next_lock = NULL; | ||
| 623 | unsigned long flags; | 759 | unsigned long flags; |
| 624 | int chain_walk = 0; | ||
| 625 | 760 | ||
| 626 | raw_spin_lock_irqsave(¤t->pi_lock, flags); | 761 | raw_spin_lock_irqsave(¤t->pi_lock, flags); |
| 627 | rt_mutex_dequeue(lock, waiter); | 762 | rt_mutex_dequeue(lock, waiter); |
| @@ -645,13 +780,13 @@ static void remove_waiter(struct rt_mutex *lock, | |||
| 645 | } | 780 | } |
| 646 | __rt_mutex_adjust_prio(owner); | 781 | __rt_mutex_adjust_prio(owner); |
| 647 | 782 | ||
| 648 | if (owner->pi_blocked_on) | 783 | /* Store the lock on which owner is blocked or NULL */ |
| 649 | chain_walk = 1; | 784 | next_lock = task_blocked_on_lock(owner); |
| 650 | 785 | ||
| 651 | raw_spin_unlock_irqrestore(&owner->pi_lock, flags); | 786 | raw_spin_unlock_irqrestore(&owner->pi_lock, flags); |
| 652 | } | 787 | } |
| 653 | 788 | ||
| 654 | if (!chain_walk) | 789 | if (!next_lock) |
| 655 | return; | 790 | return; |
| 656 | 791 | ||
| 657 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | 792 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ |
| @@ -659,7 +794,7 @@ static void remove_waiter(struct rt_mutex *lock, | |||
| 659 | 794 | ||
| 660 | raw_spin_unlock(&lock->wait_lock); | 795 | raw_spin_unlock(&lock->wait_lock); |
| 661 | 796 | ||
| 662 | rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); | 797 | rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); |
| 663 | 798 | ||
| 664 | raw_spin_lock(&lock->wait_lock); | 799 | raw_spin_lock(&lock->wait_lock); |
| 665 | } | 800 | } |
| @@ -672,6 +807,7 @@ static void remove_waiter(struct rt_mutex *lock, | |||
| 672 | void rt_mutex_adjust_pi(struct task_struct *task) | 807 | void rt_mutex_adjust_pi(struct task_struct *task) |
| 673 | { | 808 | { |
| 674 | struct rt_mutex_waiter *waiter; | 809 | struct rt_mutex_waiter *waiter; |
| 810 | struct rt_mutex *next_lock; | ||
| 675 | unsigned long flags; | 811 | unsigned long flags; |
| 676 | 812 | ||
| 677 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 813 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
| @@ -682,12 +818,13 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |||
| 682 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | 818 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| 683 | return; | 819 | return; |
| 684 | } | 820 | } |
| 685 | 821 | next_lock = waiter->lock; | |
| 686 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | 822 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| 687 | 823 | ||
| 688 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | 824 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ |
| 689 | get_task_struct(task); | 825 | get_task_struct(task); |
| 690 | rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); | 826 | |
| 827 | rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); | ||
| 691 | } | 828 | } |
| 692 | 829 | ||
| 693 | /** | 830 | /** |
| @@ -739,6 +876,26 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 739 | return ret; | 876 | return ret; |
| 740 | } | 877 | } |
| 741 | 878 | ||
| 879 | static void rt_mutex_handle_deadlock(int res, int detect_deadlock, | ||
| 880 | struct rt_mutex_waiter *w) | ||
| 881 | { | ||
| 882 | /* | ||
| 883 | * If the result is not -EDEADLOCK or the caller requested | ||
| 884 | * deadlock detection, nothing to do here. | ||
| 885 | */ | ||
| 886 | if (res != -EDEADLOCK || detect_deadlock) | ||
| 887 | return; | ||
| 888 | |||
| 889 | /* | ||
| 890 | * Yell lowdly and stop the task right here. | ||
| 891 | */ | ||
| 892 | rt_mutex_print_deadlock(w); | ||
| 893 | while (1) { | ||
| 894 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 895 | schedule(); | ||
| 896 | } | ||
| 897 | } | ||
| 898 | |||
| 742 | /* | 899 | /* |
| 743 | * Slow path lock function: | 900 | * Slow path lock function: |
| 744 | */ | 901 | */ |
| @@ -778,8 +935,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 778 | 935 | ||
| 779 | set_current_state(TASK_RUNNING); | 936 | set_current_state(TASK_RUNNING); |
| 780 | 937 | ||
| 781 | if (unlikely(ret)) | 938 | if (unlikely(ret)) { |
| 782 | remove_waiter(lock, &waiter); | 939 | remove_waiter(lock, &waiter); |
| 940 | rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); | ||
| 941 | } | ||
| 783 | 942 | ||
| 784 | /* | 943 | /* |
| 785 | * try_to_take_rt_mutex() sets the waiter bit | 944 | * try_to_take_rt_mutex() sets the waiter bit |
| @@ -835,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock) | |||
| 835 | 994 | ||
| 836 | rt_mutex_deadlock_account_unlock(current); | 995 | rt_mutex_deadlock_account_unlock(current); |
| 837 | 996 | ||
| 838 | if (!rt_mutex_has_waiters(lock)) { | 997 | /* |
| 839 | lock->owner = NULL; | 998 | * We must be careful here if the fast path is enabled. If we |
| 840 | raw_spin_unlock(&lock->wait_lock); | 999 | * have no waiters queued we cannot set owner to NULL here |
| 841 | return; | 1000 | * because of: |
| 1001 | * | ||
| 1002 | * foo->lock->owner = NULL; | ||
| 1003 | * rtmutex_lock(foo->lock); <- fast path | ||
| 1004 | * free = atomic_dec_and_test(foo->refcnt); | ||
| 1005 | * rtmutex_unlock(foo->lock); <- fast path | ||
| 1006 | * if (free) | ||
| 1007 | * kfree(foo); | ||
| 1008 | * raw_spin_unlock(foo->lock->wait_lock); | ||
| 1009 | * | ||
| 1010 | * So for the fastpath enabled kernel: | ||
| 1011 | * | ||
| 1012 | * Nothing can set the waiters bit as long as we hold | ||
| 1013 | * lock->wait_lock. So we do the following sequence: | ||
| 1014 | * | ||
| 1015 | * owner = rt_mutex_owner(lock); | ||
| 1016 | * clear_rt_mutex_waiters(lock); | ||
| 1017 | * raw_spin_unlock(&lock->wait_lock); | ||
| 1018 | * if (cmpxchg(&lock->owner, owner, 0) == owner) | ||
| 1019 | * return; | ||
| 1020 | * goto retry; | ||
| 1021 | * | ||
| 1022 | * The fastpath disabled variant is simple as all access to | ||
| 1023 | * lock->owner is serialized by lock->wait_lock: | ||
| 1024 | * | ||
| 1025 | * lock->owner = NULL; | ||
| 1026 | * raw_spin_unlock(&lock->wait_lock); | ||
| 1027 | */ | ||
| 1028 | while (!rt_mutex_has_waiters(lock)) { | ||
| 1029 | /* Drops lock->wait_lock ! */ | ||
| 1030 | if (unlock_rt_mutex_safe(lock) == true) | ||
| 1031 | return; | ||
| 1032 | /* Relock the rtmutex and try again */ | ||
| 1033 | raw_spin_lock(&lock->wait_lock); | ||
| 842 | } | 1034 | } |
| 843 | 1035 | ||
| 1036 | /* | ||
| 1037 | * The wakeup next waiter path does not suffer from the above | ||
| 1038 | * race. See the comments there. | ||
| 1039 | */ | ||
| 844 | wakeup_next_waiter(lock); | 1040 | wakeup_next_waiter(lock); |
| 845 | 1041 | ||
| 846 | raw_spin_unlock(&lock->wait_lock); | 1042 | raw_spin_unlock(&lock->wait_lock); |
| @@ -1088,7 +1284,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |||
| 1088 | return 1; | 1284 | return 1; |
| 1089 | } | 1285 | } |
| 1090 | 1286 | ||
| 1091 | ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); | 1287 | /* We enforce deadlock detection for futexes */ |
| 1288 | ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); | ||
| 1092 | 1289 | ||
| 1093 | if (ret && !rt_mutex_owner(lock)) { | 1290 | if (ret && !rt_mutex_owner(lock)) { |
| 1094 | /* | 1291 | /* |
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h index a1a1dd06421d..f6a1f3c133b1 100644 --- a/kernel/locking/rtmutex.h +++ b/kernel/locking/rtmutex.h | |||
| @@ -24,3 +24,8 @@ | |||
| 24 | #define debug_rt_mutex_print_deadlock(w) do { } while (0) | 24 | #define debug_rt_mutex_print_deadlock(w) do { } while (0) |
| 25 | #define debug_rt_mutex_detect_deadlock(w,d) (d) | 25 | #define debug_rt_mutex_detect_deadlock(w,d) (d) |
| 26 | #define debug_rt_mutex_reset_waiter(w) do { } while (0) | 26 | #define debug_rt_mutex_reset_waiter(w) do { } while (0) |
| 27 | |||
| 28 | static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) | ||
| 29 | { | ||
| 30 | WARN(1, "rtmutex deadlock detected\n"); | ||
| 31 | } | ||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 1d66e08e897d..dacc32142fcc 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -5,11 +5,66 @@ | |||
| 5 | * | 5 | * |
| 6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | 6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> |
| 7 | * and Michel Lespinasse <walken@google.com> | 7 | * and Michel Lespinasse <walken@google.com> |
| 8 | * | ||
| 9 | * Optimistic spinning by Tim Chen <tim.c.chen@intel.com> | ||
| 10 | * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. | ||
| 8 | */ | 11 | */ |
| 9 | #include <linux/rwsem.h> | 12 | #include <linux/rwsem.h> |
| 10 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
| 11 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 12 | #include <linux/export.h> | 15 | #include <linux/export.h> |
| 16 | #include <linux/sched/rt.h> | ||
| 17 | |||
| 18 | #include "mcs_spinlock.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Guide to the rw_semaphore's count field for common values. | ||
| 22 | * (32-bit case illustrated, similar for 64-bit) | ||
| 23 | * | ||
| 24 | * 0x0000000X (1) X readers active or attempting lock, no writer waiting | ||
| 25 | * X = #active_readers + #readers attempting to lock | ||
| 26 | * (X*ACTIVE_BIAS) | ||
| 27 | * | ||
| 28 | * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or | ||
| 29 | * attempting to read lock or write lock. | ||
| 30 | * | ||
| 31 | * 0xffff000X (1) X readers active or attempting lock, with waiters for lock | ||
| 32 | * X = #active readers + # readers attempting lock | ||
| 33 | * (X*ACTIVE_BIAS + WAITING_BIAS) | ||
| 34 | * (2) 1 writer attempting lock, no waiters for lock | ||
| 35 | * X-1 = #active readers + #readers attempting lock | ||
| 36 | * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS) | ||
| 37 | * (3) 1 writer active, no waiters for lock | ||
| 38 | * X-1 = #active readers + #readers attempting lock | ||
| 39 | * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS) | ||
| 40 | * | ||
| 41 | * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock | ||
| 42 | * (WAITING_BIAS + ACTIVE_BIAS) | ||
| 43 | * (2) 1 writer active or attempting lock, no waiters for lock | ||
| 44 | * (ACTIVE_WRITE_BIAS) | ||
| 45 | * | ||
| 46 | * 0xffff0000 (1) There are writers or readers queued but none active | ||
| 47 | * or in the process of attempting lock. | ||
| 48 | * (WAITING_BIAS) | ||
| 49 | * Note: writer can attempt to steal lock for this count by adding | ||
| 50 | * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count | ||
| 51 | * | ||
| 52 | * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue. | ||
| 53 | * (ACTIVE_WRITE_BIAS + WAITING_BIAS) | ||
| 54 | * | ||
| 55 | * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking | ||
| 56 | * the count becomes more than 0 for successful lock acquisition, | ||
| 57 | * i.e. the case where there are only readers or nobody has lock. | ||
| 58 | * (1st and 2nd case above). | ||
| 59 | * | ||
| 60 | * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and | ||
| 61 | * checking the count becomes ACTIVE_WRITE_BIAS for successful lock | ||
| 62 | * acquisition (i.e. nobody else has lock or attempts lock). If | ||
| 63 | * unsuccessful, in rwsem_down_write_failed, we'll check to see if there | ||
| 64 | * are only waiters but none active (5th case above), and attempt to | ||
| 65 | * steal the lock. | ||
| 66 | * | ||
| 67 | */ | ||
| 13 | 68 | ||
| 14 | /* | 69 | /* |
| 15 | * Initialize an rwsem: | 70 | * Initialize an rwsem: |
| @@ -27,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, | |||
| 27 | sem->count = RWSEM_UNLOCKED_VALUE; | 82 | sem->count = RWSEM_UNLOCKED_VALUE; |
| 28 | raw_spin_lock_init(&sem->wait_lock); | 83 | raw_spin_lock_init(&sem->wait_lock); |
| 29 | INIT_LIST_HEAD(&sem->wait_list); | 84 | INIT_LIST_HEAD(&sem->wait_list); |
| 85 | #ifdef CONFIG_SMP | ||
| 86 | sem->owner = NULL; | ||
| 87 | sem->osq = NULL; | ||
| 88 | #endif | ||
| 30 | } | 89 | } |
| 31 | 90 | ||
| 32 | EXPORT_SYMBOL(__init_rwsem); | 91 | EXPORT_SYMBOL(__init_rwsem); |
| @@ -141,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) | |||
| 141 | } | 200 | } |
| 142 | 201 | ||
| 143 | /* | 202 | /* |
| 144 | * wait for the read lock to be granted | 203 | * Wait for the read lock to be granted |
| 145 | */ | 204 | */ |
| 146 | __visible | 205 | __visible |
| 147 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | 206 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) |
| @@ -188,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
| 188 | return sem; | 247 | return sem; |
| 189 | } | 248 | } |
| 190 | 249 | ||
| 250 | static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | ||
| 251 | { | ||
| 252 | if (!(count & RWSEM_ACTIVE_MASK)) { | ||
| 253 | /* try acquiring the write lock */ | ||
| 254 | if (sem->count == RWSEM_WAITING_BIAS && | ||
| 255 | cmpxchg(&sem->count, RWSEM_WAITING_BIAS, | ||
| 256 | RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { | ||
| 257 | if (!list_is_singular(&sem->wait_list)) | ||
| 258 | rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); | ||
| 259 | return true; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | return false; | ||
| 263 | } | ||
| 264 | |||
| 265 | #ifdef CONFIG_SMP | ||
| 191 | /* | 266 | /* |
| 192 | * wait until we successfully acquire the write lock | 267 | * Try to acquire write lock before the writer has been put on wait queue. |
| 268 | */ | ||
| 269 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | ||
| 270 | { | ||
| 271 | long old, count = ACCESS_ONCE(sem->count); | ||
| 272 | |||
| 273 | while (true) { | ||
| 274 | if (!(count == 0 || count == RWSEM_WAITING_BIAS)) | ||
| 275 | return false; | ||
| 276 | |||
| 277 | old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); | ||
| 278 | if (old == count) | ||
| 279 | return true; | ||
| 280 | |||
| 281 | count = old; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | ||
| 286 | { | ||
| 287 | struct task_struct *owner; | ||
| 288 | bool on_cpu = true; | ||
| 289 | |||
| 290 | if (need_resched()) | ||
| 291 | return 0; | ||
| 292 | |||
| 293 | rcu_read_lock(); | ||
| 294 | owner = ACCESS_ONCE(sem->owner); | ||
| 295 | if (owner) | ||
| 296 | on_cpu = owner->on_cpu; | ||
| 297 | rcu_read_unlock(); | ||
| 298 | |||
| 299 | /* | ||
| 300 | * If sem->owner is not set, the rwsem owner may have | ||
| 301 | * just acquired it and not set the owner yet or the rwsem | ||
| 302 | * has been released. | ||
| 303 | */ | ||
| 304 | return on_cpu; | ||
| 305 | } | ||
| 306 | |||
| 307 | static inline bool owner_running(struct rw_semaphore *sem, | ||
| 308 | struct task_struct *owner) | ||
| 309 | { | ||
| 310 | if (sem->owner != owner) | ||
| 311 | return false; | ||
| 312 | |||
| 313 | /* | ||
| 314 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
| 315 | * sem->owner still matches owner, if that fails, owner might | ||
| 316 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
| 317 | * ensures the memory stays valid. | ||
| 318 | */ | ||
| 319 | barrier(); | ||
| 320 | |||
| 321 | return owner->on_cpu; | ||
| 322 | } | ||
| 323 | |||
| 324 | static noinline | ||
| 325 | bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) | ||
| 326 | { | ||
| 327 | rcu_read_lock(); | ||
| 328 | while (owner_running(sem, owner)) { | ||
| 329 | if (need_resched()) | ||
| 330 | break; | ||
| 331 | |||
| 332 | arch_mutex_cpu_relax(); | ||
| 333 | } | ||
| 334 | rcu_read_unlock(); | ||
| 335 | |||
| 336 | /* | ||
| 337 | * We break out the loop above on need_resched() or when the | ||
| 338 | * owner changed, which is a sign for heavy contention. Return | ||
| 339 | * success only when sem->owner is NULL. | ||
| 340 | */ | ||
| 341 | return sem->owner == NULL; | ||
| 342 | } | ||
| 343 | |||
| 344 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | ||
| 345 | { | ||
| 346 | struct task_struct *owner; | ||
| 347 | bool taken = false; | ||
| 348 | |||
| 349 | preempt_disable(); | ||
| 350 | |||
| 351 | /* sem->wait_lock should not be held when doing optimistic spinning */ | ||
| 352 | if (!rwsem_can_spin_on_owner(sem)) | ||
| 353 | goto done; | ||
| 354 | |||
| 355 | if (!osq_lock(&sem->osq)) | ||
| 356 | goto done; | ||
| 357 | |||
| 358 | while (true) { | ||
| 359 | owner = ACCESS_ONCE(sem->owner); | ||
| 360 | if (owner && !rwsem_spin_on_owner(sem, owner)) | ||
| 361 | break; | ||
| 362 | |||
| 363 | /* wait_lock will be acquired if write_lock is obtained */ | ||
| 364 | if (rwsem_try_write_lock_unqueued(sem)) { | ||
| 365 | taken = true; | ||
| 366 | break; | ||
| 367 | } | ||
| 368 | |||
| 369 | /* | ||
| 370 | * When there's no owner, we might have preempted between the | ||
| 371 | * owner acquiring the lock and setting the owner field. If | ||
| 372 | * we're an RT task that will live-lock because we won't let | ||
| 373 | * the owner complete. | ||
| 374 | */ | ||
| 375 | if (!owner && (need_resched() || rt_task(current))) | ||
| 376 | break; | ||
| 377 | |||
| 378 | /* | ||
| 379 | * The cpu_relax() call is a compiler barrier which forces | ||
| 380 | * everything in this loop to be re-loaded. We don't need | ||
| 381 | * memory barriers as we'll eventually observe the right | ||
| 382 | * values at the cost of a few extra spins. | ||
| 383 | */ | ||
| 384 | arch_mutex_cpu_relax(); | ||
| 385 | } | ||
| 386 | osq_unlock(&sem->osq); | ||
| 387 | done: | ||
| 388 | preempt_enable(); | ||
| 389 | return taken; | ||
| 390 | } | ||
| 391 | |||
| 392 | #else | ||
| 393 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | ||
| 394 | { | ||
| 395 | return false; | ||
| 396 | } | ||
| 397 | #endif | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Wait until we successfully acquire the write lock | ||
| 193 | */ | 401 | */ |
| 194 | __visible | 402 | __visible |
| 195 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | 403 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) |
| 196 | { | 404 | { |
| 197 | long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; | 405 | long count; |
| 406 | bool waiting = true; /* any queued threads before us */ | ||
| 198 | struct rwsem_waiter waiter; | 407 | struct rwsem_waiter waiter; |
| 199 | struct task_struct *tsk = current; | ||
| 200 | 408 | ||
| 201 | /* set up my own style of waitqueue */ | 409 | /* undo write bias from down_write operation, stop active locking */ |
| 202 | waiter.task = tsk; | 410 | count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); |
| 411 | |||
| 412 | /* do optimistic spinning and steal lock if possible */ | ||
| 413 | if (rwsem_optimistic_spin(sem)) | ||
| 414 | return sem; | ||
| 415 | |||
| 416 | /* | ||
| 417 | * Optimistic spinning failed, proceed to the slowpath | ||
| 418 | * and block until we can acquire the sem. | ||
| 419 | */ | ||
| 420 | waiter.task = current; | ||
| 203 | waiter.type = RWSEM_WAITING_FOR_WRITE; | 421 | waiter.type = RWSEM_WAITING_FOR_WRITE; |
| 204 | 422 | ||
| 205 | raw_spin_lock_irq(&sem->wait_lock); | 423 | raw_spin_lock_irq(&sem->wait_lock); |
| 424 | |||
| 425 | /* account for this before adding a new element to the list */ | ||
| 206 | if (list_empty(&sem->wait_list)) | 426 | if (list_empty(&sem->wait_list)) |
| 207 | adjustment += RWSEM_WAITING_BIAS; | 427 | waiting = false; |
| 428 | |||
| 208 | list_add_tail(&waiter.list, &sem->wait_list); | 429 | list_add_tail(&waiter.list, &sem->wait_list); |
| 209 | 430 | ||
| 210 | /* we're now waiting on the lock, but no longer actively locking */ | 431 | /* we're now waiting on the lock, but no longer actively locking */ |
| 211 | count = rwsem_atomic_update(adjustment, sem); | 432 | if (waiting) { |
| 433 | count = ACCESS_ONCE(sem->count); | ||
| 434 | |||
| 435 | /* | ||
| 436 | * If there were already threads queued before us and there are | ||
| 437 | * no active writers, the lock must be read owned; so we try to | ||
| 438 | * wake any read locks that were queued ahead of us. | ||
| 439 | */ | ||
| 440 | if (count > RWSEM_WAITING_BIAS) | ||
| 441 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); | ||
| 212 | 442 | ||
| 213 | /* If there were already threads queued before us and there are no | 443 | } else |
| 214 | * active writers, the lock must be read owned; so we try to wake | 444 | count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); |
| 215 | * any read locks that were queued ahead of us. */ | ||
| 216 | if (count > RWSEM_WAITING_BIAS && | ||
| 217 | adjustment == -RWSEM_ACTIVE_WRITE_BIAS) | ||
| 218 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); | ||
| 219 | 445 | ||
| 220 | /* wait until we successfully acquire the lock */ | 446 | /* wait until we successfully acquire the lock */ |
| 221 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 447 | set_current_state(TASK_UNINTERRUPTIBLE); |
| 222 | while (true) { | 448 | while (true) { |
| 223 | if (!(count & RWSEM_ACTIVE_MASK)) { | 449 | if (rwsem_try_write_lock(count, sem)) |
| 224 | /* Try acquiring the write lock. */ | 450 | break; |
| 225 | count = RWSEM_ACTIVE_WRITE_BIAS; | ||
| 226 | if (!list_is_singular(&sem->wait_list)) | ||
| 227 | count += RWSEM_WAITING_BIAS; | ||
| 228 | |||
| 229 | if (sem->count == RWSEM_WAITING_BIAS && | ||
| 230 | cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == | ||
| 231 | RWSEM_WAITING_BIAS) | ||
| 232 | break; | ||
| 233 | } | ||
| 234 | |||
| 235 | raw_spin_unlock_irq(&sem->wait_lock); | 451 | raw_spin_unlock_irq(&sem->wait_lock); |
| 236 | 452 | ||
| 237 | /* Block until there are no active lockers. */ | 453 | /* Block until there are no active lockers. */ |
| 238 | do { | 454 | do { |
| 239 | schedule(); | 455 | schedule(); |
| 240 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 456 | set_current_state(TASK_UNINTERRUPTIBLE); |
| 241 | } while ((count = sem->count) & RWSEM_ACTIVE_MASK); | 457 | } while ((count = sem->count) & RWSEM_ACTIVE_MASK); |
| 242 | 458 | ||
| 243 | raw_spin_lock_irq(&sem->wait_lock); | 459 | raw_spin_lock_irq(&sem->wait_lock); |
| 244 | } | 460 | } |
| 461 | __set_current_state(TASK_RUNNING); | ||
| 245 | 462 | ||
| 246 | list_del(&waiter.list); | 463 | list_del(&waiter.list); |
| 247 | raw_spin_unlock_irq(&sem->wait_lock); | 464 | raw_spin_unlock_irq(&sem->wait_lock); |
| 248 | tsk->state = TASK_RUNNING; | ||
| 249 | 465 | ||
| 250 | return sem; | 466 | return sem; |
| 251 | } | 467 | } |
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index cfff1435bdfb..42f806de49d4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c | |||
| @@ -12,6 +12,27 @@ | |||
| 12 | 12 | ||
| 13 | #include <linux/atomic.h> | 13 | #include <linux/atomic.h> |
| 14 | 14 | ||
| 15 | #if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) | ||
| 16 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | ||
| 17 | { | ||
| 18 | sem->owner = current; | ||
| 19 | } | ||
| 20 | |||
| 21 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | ||
| 22 | { | ||
| 23 | sem->owner = NULL; | ||
| 24 | } | ||
| 25 | |||
| 26 | #else | ||
| 27 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | ||
| 28 | { | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | ||
| 32 | { | ||
| 33 | } | ||
| 34 | #endif | ||
| 35 | |||
| 15 | /* | 36 | /* |
| 16 | * lock for reading | 37 | * lock for reading |
| 17 | */ | 38 | */ |
| @@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem) | |||
| 48 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 69 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
| 49 | 70 | ||
| 50 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 71 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
| 72 | rwsem_set_owner(sem); | ||
| 51 | } | 73 | } |
| 52 | 74 | ||
| 53 | EXPORT_SYMBOL(down_write); | 75 | EXPORT_SYMBOL(down_write); |
| @@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem) | |||
| 59 | { | 81 | { |
| 60 | int ret = __down_write_trylock(sem); | 82 | int ret = __down_write_trylock(sem); |
| 61 | 83 | ||
| 62 | if (ret == 1) | 84 | if (ret == 1) { |
| 63 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); | 85 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
| 86 | rwsem_set_owner(sem); | ||
| 87 | } | ||
| 88 | |||
| 64 | return ret; | 89 | return ret; |
| 65 | } | 90 | } |
| 66 | 91 | ||
| @@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem) | |||
| 85 | { | 110 | { |
| 86 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | 111 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
| 87 | 112 | ||
| 113 | rwsem_clear_owner(sem); | ||
| 88 | __up_write(sem); | 114 | __up_write(sem); |
| 89 | } | 115 | } |
| 90 | 116 | ||
| @@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem) | |||
| 99 | * lockdep: a downgraded write will live on as a write | 125 | * lockdep: a downgraded write will live on as a write |
| 100 | * dependency. | 126 | * dependency. |
| 101 | */ | 127 | */ |
| 128 | rwsem_clear_owner(sem); | ||
| 102 | __downgrade_write(sem); | 129 | __downgrade_write(sem); |
| 103 | } | 130 | } |
| 104 | 131 | ||
| @@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) | |||
| 122 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | 149 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); |
| 123 | 150 | ||
| 124 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 151 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
| 152 | rwsem_set_owner(sem); | ||
| 125 | } | 153 | } |
| 126 | 154 | ||
| 127 | EXPORT_SYMBOL(_down_write_nest_lock); | 155 | EXPORT_SYMBOL(_down_write_nest_lock); |
| @@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 141 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 169 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
| 142 | 170 | ||
| 143 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 171 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
| 172 | rwsem_set_owner(sem); | ||
| 144 | } | 173 | } |
| 145 | 174 | ||
| 146 | EXPORT_SYMBOL(down_write_nested); | 175 | EXPORT_SYMBOL(down_write_nested); |
