diff options
| author | Waiman Long <Waiman.Long@hp.com> | 2014-02-03 07:18:49 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2014-06-06 01:58:28 -0400 |
| commit | 70af2f8a4f48d6cebdf92d533d3aef37853ce6de (patch) | |
| tree | 2a8d7b54053ef9fc7d45794ab28addbd00870819 /kernel/locking | |
| parent | 9161f5409798d52aa8598ff12575fde2327bed84 (diff) | |
locking/rwlocks: Introduce 'qrwlocks' - fair, queued rwlocks
This rwlock uses the arch_spin_lock_t as a waitqueue, and assuming the
arch_spin_lock_t is a fair lock (ticket,mcs etc..) the resulting
rwlock is a fair lock.
It fits in the same 8 bytes as the regular rwlock_t by folding the
reader and writer count into a single integer, using the remaining 4
bytes for the arch_spinlock_t.
Architectures that can single-copy adress bytes can optimize
queue_write_unlock() with a 0 write to the LSB (the write count).
Performance as measured by Davidlohr Bueso (rwlock_t -> qrwlock_t):
+--------------+-------------+---------------+
| Workload | #users | delta |
+--------------+-------------+---------------+
| alltests | > 1400 | -4.83% |
| custom | 0-100,> 100 | +1.43%,-1.57% |
| high_systime | > 1000 | -2.61 |
| shared | all | +0.32 |
+--------------+-------------+---------------+
http://www.stgolabs.net/qrwlock-stuff/aim7-results-vs-rwsem_optsin/
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
[peterz: near complete rewrite]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E.McKenney" <paulmck@linux.vnet.ibm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-gac1nnl3wvs2ij87zv2xkdzq@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/Makefile | 1 | ||||
| -rw-r--r-- | kernel/locking/qrwlock.c | 133 |
2 files changed, 134 insertions, 0 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index b8bdcd4785b7..8541bfdfd232 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |||
| 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o |
| 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o |
| 27 | obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o | ||
| 27 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | 28 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o |
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c new file mode 100644 index 000000000000..fb5b8ac411a5 --- /dev/null +++ b/kernel/locking/qrwlock.c | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | /* | ||
| 2 | * Queue read/write lock | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. | ||
| 15 | * | ||
| 16 | * Authors: Waiman Long <waiman.long@hp.com> | ||
| 17 | */ | ||
| 18 | #include <linux/smp.h> | ||
| 19 | #include <linux/bug.h> | ||
| 20 | #include <linux/cpumask.h> | ||
| 21 | #include <linux/percpu.h> | ||
| 22 | #include <linux/hardirq.h> | ||
| 23 | #include <linux/mutex.h> | ||
| 24 | #include <asm/qrwlock.h> | ||
| 25 | |||
| 26 | /** | ||
| 27 | * rspin_until_writer_unlock - inc reader count & spin until writer is gone | ||
| 28 | * @lock : Pointer to queue rwlock structure | ||
| 29 | * @writer: Current queue rwlock writer status byte | ||
| 30 | * | ||
| 31 | * In interrupt context or at the head of the queue, the reader will just | ||
| 32 | * increment the reader count & wait until the writer releases the lock. | ||
| 33 | */ | ||
| 34 | static __always_inline void | ||
| 35 | rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) | ||
| 36 | { | ||
| 37 | while ((cnts & _QW_WMASK) == _QW_LOCKED) { | ||
| 38 | arch_mutex_cpu_relax(); | ||
| 39 | cnts = smp_load_acquire((u32 *)&lock->cnts); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | /** | ||
| 44 | * queue_read_lock_slowpath - acquire read lock of a queue rwlock | ||
| 45 | * @lock: Pointer to queue rwlock structure | ||
| 46 | */ | ||
| 47 | void queue_read_lock_slowpath(struct qrwlock *lock) | ||
| 48 | { | ||
| 49 | u32 cnts; | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Readers come here when they cannot get the lock without waiting | ||
| 53 | */ | ||
| 54 | if (unlikely(in_interrupt())) { | ||
| 55 | /* | ||
| 56 | * Readers in interrupt context will spin until the lock is | ||
| 57 | * available without waiting in the queue. | ||
| 58 | */ | ||
| 59 | cnts = smp_load_acquire((u32 *)&lock->cnts); | ||
| 60 | rspin_until_writer_unlock(lock, cnts); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | atomic_sub(_QR_BIAS, &lock->cnts); | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Put the reader into the wait queue | ||
| 67 | */ | ||
| 68 | arch_spin_lock(&lock->lock); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * At the head of the wait queue now, wait until the writer state | ||
| 72 | * goes to 0 and then try to increment the reader count and get | ||
| 73 | * the lock. It is possible that an incoming writer may steal the | ||
| 74 | * lock in the interim, so it is necessary to check the writer byte | ||
| 75 | * to make sure that the write lock isn't taken. | ||
| 76 | */ | ||
| 77 | while (atomic_read(&lock->cnts) & _QW_WMASK) | ||
| 78 | arch_mutex_cpu_relax(); | ||
| 79 | |||
| 80 | cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; | ||
| 81 | rspin_until_writer_unlock(lock, cnts); | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Signal the next one in queue to become queue head | ||
| 85 | */ | ||
| 86 | arch_spin_unlock(&lock->lock); | ||
| 87 | } | ||
| 88 | EXPORT_SYMBOL(queue_read_lock_slowpath); | ||
| 89 | |||
| 90 | /** | ||
| 91 | * queue_write_lock_slowpath - acquire write lock of a queue rwlock | ||
| 92 | * @lock : Pointer to queue rwlock structure | ||
| 93 | */ | ||
| 94 | void queue_write_lock_slowpath(struct qrwlock *lock) | ||
| 95 | { | ||
| 96 | u32 cnts; | ||
| 97 | |||
| 98 | /* Put the writer into the wait queue */ | ||
| 99 | arch_spin_lock(&lock->lock); | ||
| 100 | |||
| 101 | /* Try to acquire the lock directly if no reader is present */ | ||
| 102 | if (!atomic_read(&lock->cnts) && | ||
| 103 | (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)) | ||
| 104 | goto unlock; | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Set the waiting flag to notify readers that a writer is pending, | ||
| 108 | * or wait for a previous writer to go away. | ||
| 109 | */ | ||
| 110 | for (;;) { | ||
| 111 | cnts = atomic_read(&lock->cnts); | ||
| 112 | if (!(cnts & _QW_WMASK) && | ||
| 113 | (atomic_cmpxchg(&lock->cnts, cnts, | ||
| 114 | cnts | _QW_WAITING) == cnts)) | ||
| 115 | break; | ||
| 116 | |||
| 117 | arch_mutex_cpu_relax(); | ||
| 118 | } | ||
| 119 | |||
| 120 | /* When no more readers, set the locked flag */ | ||
| 121 | for (;;) { | ||
| 122 | cnts = atomic_read(&lock->cnts); | ||
| 123 | if ((cnts == _QW_WAITING) && | ||
| 124 | (atomic_cmpxchg(&lock->cnts, _QW_WAITING, | ||
| 125 | _QW_LOCKED) == _QW_WAITING)) | ||
| 126 | break; | ||
| 127 | |||
| 128 | arch_mutex_cpu_relax(); | ||
| 129 | } | ||
| 130 | unlock: | ||
| 131 | arch_spin_unlock(&lock->lock); | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(queue_write_lock_slowpath); | ||
