diff options
| author | Waiman Long <Waiman.Long@hp.com> | 2014-02-03 07:18:49 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2014-06-06 01:58:28 -0400 |
| commit | 70af2f8a4f48d6cebdf92d533d3aef37853ce6de (patch) | |
| tree | 2a8d7b54053ef9fc7d45794ab28addbd00870819 | |
| parent | 9161f5409798d52aa8598ff12575fde2327bed84 (diff) | |
locking/rwlocks: Introduce 'qrwlocks' - fair, queued rwlocks
This rwlock uses the arch_spin_lock_t as a waitqueue, and assuming the
arch_spin_lock_t is a fair lock (ticket,mcs etc..) the resulting
rwlock is a fair lock.
It fits in the same 8 bytes as the regular rwlock_t by folding the
reader and writer count into a single integer, using the remaining 4
bytes for the arch_spinlock_t.
Architectures that can single-copy adress bytes can optimize
queue_write_unlock() with a 0 write to the LSB (the write count).
Performance as measured by Davidlohr Bueso (rwlock_t -> qrwlock_t):
+--------------+-------------+---------------+
| Workload | #users | delta |
+--------------+-------------+---------------+
| alltests | > 1400 | -4.83% |
| custom | 0-100,> 100 | +1.43%,-1.57% |
| high_systime | > 1000 | -2.61 |
| shared | all | +0.32 |
+--------------+-------------+---------------+
http://www.stgolabs.net/qrwlock-stuff/aim7-results-vs-rwsem_optsin/
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
[peterz: near complete rewrite]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E.McKenney" <paulmck@linux.vnet.ibm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-gac1nnl3wvs2ij87zv2xkdzq@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | include/asm-generic/qrwlock.h | 166 | ||||
| -rw-r--r-- | include/asm-generic/qrwlock_types.h | 21 | ||||
| -rw-r--r-- | kernel/Kconfig.locks | 7 | ||||
| -rw-r--r-- | kernel/locking/Makefile | 1 | ||||
| -rw-r--r-- | kernel/locking/qrwlock.c | 133 |
5 files changed, 328 insertions, 0 deletions
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h new file mode 100644 index 000000000000..6383d54bf983 --- /dev/null +++ b/include/asm-generic/qrwlock.h | |||
| @@ -0,0 +1,166 @@ | |||
| 1 | /* | ||
| 2 | * Queue read/write lock | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. | ||
| 15 | * | ||
| 16 | * Authors: Waiman Long <waiman.long@hp.com> | ||
| 17 | */ | ||
| 18 | #ifndef __ASM_GENERIC_QRWLOCK_H | ||
| 19 | #define __ASM_GENERIC_QRWLOCK_H | ||
| 20 | |||
| 21 | #include <linux/atomic.h> | ||
| 22 | #include <asm/barrier.h> | ||
| 23 | #include <asm/processor.h> | ||
| 24 | |||
| 25 | #include <asm-generic/qrwlock_types.h> | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Writer states & reader shift and bias | ||
| 29 | */ | ||
| 30 | #define _QW_WAITING 1 /* A writer is waiting */ | ||
| 31 | #define _QW_LOCKED 0xff /* A writer holds the lock */ | ||
| 32 | #define _QW_WMASK 0xff /* Writer mask */ | ||
| 33 | #define _QR_SHIFT 8 /* Reader count shift */ | ||
| 34 | #define _QR_BIAS (1U << _QR_SHIFT) | ||
| 35 | |||
| 36 | /* | ||
| 37 | * External function declarations | ||
| 38 | */ | ||
| 39 | extern void queue_read_lock_slowpath(struct qrwlock *lock); | ||
| 40 | extern void queue_write_lock_slowpath(struct qrwlock *lock); | ||
| 41 | |||
| 42 | /** | ||
| 43 | * queue_read_can_lock- would read_trylock() succeed? | ||
| 44 | * @lock: Pointer to queue rwlock structure | ||
| 45 | */ | ||
| 46 | static inline int queue_read_can_lock(struct qrwlock *lock) | ||
| 47 | { | ||
| 48 | return !(atomic_read(&lock->cnts) & _QW_WMASK); | ||
| 49 | } | ||
| 50 | |||
| 51 | /** | ||
| 52 | * queue_write_can_lock- would write_trylock() succeed? | ||
| 53 | * @lock: Pointer to queue rwlock structure | ||
| 54 | */ | ||
| 55 | static inline int queue_write_can_lock(struct qrwlock *lock) | ||
| 56 | { | ||
| 57 | return !atomic_read(&lock->cnts); | ||
| 58 | } | ||
| 59 | |||
| 60 | /** | ||
| 61 | * queue_read_trylock - try to acquire read lock of a queue rwlock | ||
| 62 | * @lock : Pointer to queue rwlock structure | ||
| 63 | * Return: 1 if lock acquired, 0 if failed | ||
| 64 | */ | ||
| 65 | static inline int queue_read_trylock(struct qrwlock *lock) | ||
| 66 | { | ||
| 67 | u32 cnts; | ||
| 68 | |||
| 69 | cnts = atomic_read(&lock->cnts); | ||
| 70 | if (likely(!(cnts & _QW_WMASK))) { | ||
| 71 | cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts); | ||
| 72 | if (likely(!(cnts & _QW_WMASK))) | ||
| 73 | return 1; | ||
| 74 | atomic_sub(_QR_BIAS, &lock->cnts); | ||
| 75 | } | ||
| 76 | return 0; | ||
| 77 | } | ||
| 78 | |||
| 79 | /** | ||
| 80 | * queue_write_trylock - try to acquire write lock of a queue rwlock | ||
| 81 | * @lock : Pointer to queue rwlock structure | ||
| 82 | * Return: 1 if lock acquired, 0 if failed | ||
| 83 | */ | ||
| 84 | static inline int queue_write_trylock(struct qrwlock *lock) | ||
| 85 | { | ||
| 86 | u32 cnts; | ||
| 87 | |||
| 88 | cnts = atomic_read(&lock->cnts); | ||
| 89 | if (unlikely(cnts)) | ||
| 90 | return 0; | ||
| 91 | |||
| 92 | return likely(atomic_cmpxchg(&lock->cnts, | ||
| 93 | cnts, cnts | _QW_LOCKED) == cnts); | ||
| 94 | } | ||
| 95 | /** | ||
| 96 | * queue_read_lock - acquire read lock of a queue rwlock | ||
| 97 | * @lock: Pointer to queue rwlock structure | ||
| 98 | */ | ||
| 99 | static inline void queue_read_lock(struct qrwlock *lock) | ||
| 100 | { | ||
| 101 | u32 cnts; | ||
| 102 | |||
| 103 | cnts = atomic_add_return(_QR_BIAS, &lock->cnts); | ||
| 104 | if (likely(!(cnts & _QW_WMASK))) | ||
| 105 | return; | ||
| 106 | |||
| 107 | /* The slowpath will decrement the reader count, if necessary. */ | ||
| 108 | queue_read_lock_slowpath(lock); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * queue_write_lock - acquire write lock of a queue rwlock | ||
| 113 | * @lock : Pointer to queue rwlock structure | ||
| 114 | */ | ||
| 115 | static inline void queue_write_lock(struct qrwlock *lock) | ||
| 116 | { | ||
| 117 | /* Optimize for the unfair lock case where the fair flag is 0. */ | ||
| 118 | if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0) | ||
| 119 | return; | ||
| 120 | |||
| 121 | queue_write_lock_slowpath(lock); | ||
| 122 | } | ||
| 123 | |||
| 124 | /** | ||
| 125 | * queue_read_unlock - release read lock of a queue rwlock | ||
| 126 | * @lock : Pointer to queue rwlock structure | ||
| 127 | */ | ||
| 128 | static inline void queue_read_unlock(struct qrwlock *lock) | ||
| 129 | { | ||
| 130 | /* | ||
| 131 | * Atomically decrement the reader count | ||
| 132 | */ | ||
| 133 | smp_mb__before_atomic(); | ||
| 134 | atomic_sub(_QR_BIAS, &lock->cnts); | ||
| 135 | } | ||
| 136 | |||
| 137 | #ifndef queue_write_unlock | ||
| 138 | /** | ||
| 139 | * queue_write_unlock - release write lock of a queue rwlock | ||
| 140 | * @lock : Pointer to queue rwlock structure | ||
| 141 | */ | ||
| 142 | static inline void queue_write_unlock(struct qrwlock *lock) | ||
| 143 | { | ||
| 144 | /* | ||
| 145 | * If the writer field is atomic, it can be cleared directly. | ||
| 146 | * Otherwise, an atomic subtraction will be used to clear it. | ||
| 147 | */ | ||
| 148 | smp_mb__before_atomic(); | ||
| 149 | atomic_sub(_QW_LOCKED, &lock->cnts); | ||
| 150 | } | ||
| 151 | #endif | ||
| 152 | |||
| 153 | /* | ||
| 154 | * Remapping rwlock architecture specific functions to the corresponding | ||
| 155 | * queue rwlock functions. | ||
| 156 | */ | ||
| 157 | #define arch_read_can_lock(l) queue_read_can_lock(l) | ||
| 158 | #define arch_write_can_lock(l) queue_write_can_lock(l) | ||
| 159 | #define arch_read_lock(l) queue_read_lock(l) | ||
| 160 | #define arch_write_lock(l) queue_write_lock(l) | ||
| 161 | #define arch_read_trylock(l) queue_read_trylock(l) | ||
| 162 | #define arch_write_trylock(l) queue_write_trylock(l) | ||
| 163 | #define arch_read_unlock(l) queue_read_unlock(l) | ||
| 164 | #define arch_write_unlock(l) queue_write_unlock(l) | ||
| 165 | |||
| 166 | #endif /* __ASM_GENERIC_QRWLOCK_H */ | ||
diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h new file mode 100644 index 000000000000..4d76f24df518 --- /dev/null +++ b/include/asm-generic/qrwlock_types.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | #ifndef __ASM_GENERIC_QRWLOCK_TYPES_H | ||
| 2 | #define __ASM_GENERIC_QRWLOCK_TYPES_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <asm/spinlock_types.h> | ||
| 6 | |||
| 7 | /* | ||
| 8 | * The queue read/write lock data structure | ||
| 9 | */ | ||
| 10 | |||
| 11 | typedef struct qrwlock { | ||
| 12 | atomic_t cnts; | ||
| 13 | arch_spinlock_t lock; | ||
| 14 | } arch_rwlock_t; | ||
| 15 | |||
| 16 | #define __ARCH_RW_LOCK_UNLOCKED { \ | ||
| 17 | .cnts = ATOMIC_INIT(0), \ | ||
| 18 | .lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | ||
| 19 | } | ||
| 20 | |||
| 21 | #endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */ | ||
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index d2b32ac27a39..35536d9c0964 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
| @@ -223,3 +223,10 @@ endif | |||
| 223 | config MUTEX_SPIN_ON_OWNER | 223 | config MUTEX_SPIN_ON_OWNER |
| 224 | def_bool y | 224 | def_bool y |
| 225 | depends on SMP && !DEBUG_MUTEXES | 225 | depends on SMP && !DEBUG_MUTEXES |
| 226 | |||
| 227 | config ARCH_USE_QUEUE_RWLOCK | ||
| 228 | bool | ||
| 229 | |||
| 230 | config QUEUE_RWLOCK | ||
| 231 | def_bool y if ARCH_USE_QUEUE_RWLOCK | ||
| 232 | depends on SMP | ||
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index b8bdcd4785b7..8541bfdfd232 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |||
| 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o |
| 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o |
| 27 | obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o | ||
| 27 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | 28 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o |
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c new file mode 100644 index 000000000000..fb5b8ac411a5 --- /dev/null +++ b/kernel/locking/qrwlock.c | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | /* | ||
| 2 | * Queue read/write lock | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. | ||
| 15 | * | ||
| 16 | * Authors: Waiman Long <waiman.long@hp.com> | ||
| 17 | */ | ||
| 18 | #include <linux/smp.h> | ||
| 19 | #include <linux/bug.h> | ||
| 20 | #include <linux/cpumask.h> | ||
| 21 | #include <linux/percpu.h> | ||
| 22 | #include <linux/hardirq.h> | ||
| 23 | #include <linux/mutex.h> | ||
| 24 | #include <asm/qrwlock.h> | ||
| 25 | |||
| 26 | /** | ||
| 27 | * rspin_until_writer_unlock - inc reader count & spin until writer is gone | ||
| 28 | * @lock : Pointer to queue rwlock structure | ||
| 29 | * @writer: Current queue rwlock writer status byte | ||
| 30 | * | ||
| 31 | * In interrupt context or at the head of the queue, the reader will just | ||
| 32 | * increment the reader count & wait until the writer releases the lock. | ||
| 33 | */ | ||
| 34 | static __always_inline void | ||
| 35 | rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) | ||
| 36 | { | ||
| 37 | while ((cnts & _QW_WMASK) == _QW_LOCKED) { | ||
| 38 | arch_mutex_cpu_relax(); | ||
| 39 | cnts = smp_load_acquire((u32 *)&lock->cnts); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | /** | ||
| 44 | * queue_read_lock_slowpath - acquire read lock of a queue rwlock | ||
| 45 | * @lock: Pointer to queue rwlock structure | ||
| 46 | */ | ||
| 47 | void queue_read_lock_slowpath(struct qrwlock *lock) | ||
| 48 | { | ||
| 49 | u32 cnts; | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Readers come here when they cannot get the lock without waiting | ||
| 53 | */ | ||
| 54 | if (unlikely(in_interrupt())) { | ||
| 55 | /* | ||
| 56 | * Readers in interrupt context will spin until the lock is | ||
| 57 | * available without waiting in the queue. | ||
| 58 | */ | ||
| 59 | cnts = smp_load_acquire((u32 *)&lock->cnts); | ||
| 60 | rspin_until_writer_unlock(lock, cnts); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | atomic_sub(_QR_BIAS, &lock->cnts); | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Put the reader into the wait queue | ||
| 67 | */ | ||
| 68 | arch_spin_lock(&lock->lock); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * At the head of the wait queue now, wait until the writer state | ||
| 72 | * goes to 0 and then try to increment the reader count and get | ||
| 73 | * the lock. It is possible that an incoming writer may steal the | ||
| 74 | * lock in the interim, so it is necessary to check the writer byte | ||
| 75 | * to make sure that the write lock isn't taken. | ||
| 76 | */ | ||
| 77 | while (atomic_read(&lock->cnts) & _QW_WMASK) | ||
| 78 | arch_mutex_cpu_relax(); | ||
| 79 | |||
| 80 | cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; | ||
| 81 | rspin_until_writer_unlock(lock, cnts); | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Signal the next one in queue to become queue head | ||
| 85 | */ | ||
| 86 | arch_spin_unlock(&lock->lock); | ||
| 87 | } | ||
| 88 | EXPORT_SYMBOL(queue_read_lock_slowpath); | ||
| 89 | |||
| 90 | /** | ||
| 91 | * queue_write_lock_slowpath - acquire write lock of a queue rwlock | ||
| 92 | * @lock : Pointer to queue rwlock structure | ||
| 93 | */ | ||
| 94 | void queue_write_lock_slowpath(struct qrwlock *lock) | ||
| 95 | { | ||
| 96 | u32 cnts; | ||
| 97 | |||
| 98 | /* Put the writer into the wait queue */ | ||
| 99 | arch_spin_lock(&lock->lock); | ||
| 100 | |||
| 101 | /* Try to acquire the lock directly if no reader is present */ | ||
| 102 | if (!atomic_read(&lock->cnts) && | ||
| 103 | (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)) | ||
| 104 | goto unlock; | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Set the waiting flag to notify readers that a writer is pending, | ||
| 108 | * or wait for a previous writer to go away. | ||
| 109 | */ | ||
| 110 | for (;;) { | ||
| 111 | cnts = atomic_read(&lock->cnts); | ||
| 112 | if (!(cnts & _QW_WMASK) && | ||
| 113 | (atomic_cmpxchg(&lock->cnts, cnts, | ||
| 114 | cnts | _QW_WAITING) == cnts)) | ||
| 115 | break; | ||
| 116 | |||
| 117 | arch_mutex_cpu_relax(); | ||
| 118 | } | ||
| 119 | |||
| 120 | /* When no more readers, set the locked flag */ | ||
| 121 | for (;;) { | ||
| 122 | cnts = atomic_read(&lock->cnts); | ||
| 123 | if ((cnts == _QW_WAITING) && | ||
| 124 | (atomic_cmpxchg(&lock->cnts, _QW_WAITING, | ||
| 125 | _QW_LOCKED) == _QW_WAITING)) | ||
| 126 | break; | ||
| 127 | |||
| 128 | arch_mutex_cpu_relax(); | ||
| 129 | } | ||
| 130 | unlock: | ||
| 131 | arch_spin_unlock(&lock->lock); | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(queue_write_lock_slowpath); | ||
