aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rwsem.c
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2013-05-07 09:45:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-07 10:20:16 -0400
commitfe6e674c6187d4f452a679ced7e95262bd517936 (patch)
treeba04818049ec225a541f278940532473d3005b9c /lib/rwsem.c
parent8cf5322ce69afea1fab6a6270db24d057d664798 (diff)
rwsem: implement support for write lock stealing on the fastpath
When we decide to wake up readers, we must first grant them as many read locks as necessary, and then actually wake up all these readers. But in order to know how many read shares to grant, we must first count the readers at the head of the queue. This might take a while if there are many readers, and we want to be protected against a writer stealing the lock while we're counting. To that end, we grant the first reader lock before counting how many more readers are queued. We also require some adjustments to the wake_type semantics. RWSEM_WAKE_NO_ACTIVE used to mean that we had found the count to be RWSEM_WAITING_BIAS, in which case the rwsem was known to be free as nobody could steal it while we hold the wait_lock. This doesn't make sense once we implement fastpath write lock stealing, so we now use RWSEM_WAKE_ANY in that case. Similarly, when rwsem_down_write_failed found that a read lock was active, it would use RWSEM_WAKE_READ_OWNED which signalled that new readers could be woken without checking first that the rwsem was available. We can't do that anymore since the existing readers might release their read locks, and a writer could steal the lock before we wake up additional readers. So, we have to use a new RWSEM_WAKE_READERS value to indicate we only want to wake readers, but we don't currently hold any read lock. Signed-off-by: Michel Lespinasse <walken@google.com> Reviewed-by: Peter Hurley <peter@hurleysoftware.com> Acked-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/rwsem.c')
-rw-r--r--lib/rwsem.c64
1 files changed, 32 insertions, 32 deletions
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 9a675fa9d78e..bbe48c04f363 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -4,6 +4,7 @@
4 * Derived from arch/i386/kernel/semaphore.c 4 * Derived from arch/i386/kernel/semaphore.c
5 * 5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com>
7 */ 8 */
8#include <linux/rwsem.h> 9#include <linux/rwsem.h>
9#include <linux/sched.h> 10#include <linux/sched.h>
@@ -41,13 +42,11 @@ struct rwsem_waiter {
41 enum rwsem_waiter_type type; 42 enum rwsem_waiter_type type;
42}; 43};
43 44
44/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and 45enum rwsem_wake_type {
45 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held 46 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
46 * since the rwsem value was observed. 47 RWSEM_WAKE_READERS, /* Wake readers only */
47 */ 48 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
48#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ 49};
49#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
50#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
51 50
52/* 51/*
53 * handle the lock release when processes blocked on it that can now run 52 * handle the lock release when processes blocked on it that can now run
@@ -60,16 +59,16 @@ struct rwsem_waiter {
60 * - writers are only woken if downgrading is false 59 * - writers are only woken if downgrading is false
61 */ 60 */
62static struct rw_semaphore * 61static struct rw_semaphore *
63__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) 62__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
64{ 63{
65 struct rwsem_waiter *waiter; 64 struct rwsem_waiter *waiter;
66 struct task_struct *tsk; 65 struct task_struct *tsk;
67 struct list_head *next; 66 struct list_head *next;
68 signed long woken, loop, adjustment; 67 signed long oldcount, woken, loop, adjustment;
69 68
70 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
71 if (waiter->type == RWSEM_WAITING_FOR_WRITE) { 70 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
72 if (wake_type != RWSEM_WAKE_READ_OWNED) 71 if (wake_type == RWSEM_WAKE_ANY)
73 /* Wake writer at the front of the queue, but do not 72 /* Wake writer at the front of the queue, but do not
74 * grant it the lock yet as we want other writers 73 * grant it the lock yet as we want other writers
75 * to be able to steal it. Readers, on the other hand, 74 * to be able to steal it. Readers, on the other hand,
@@ -79,24 +78,24 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
79 goto out; 78 goto out;
80 } 79 }
81 80
82 /* If we come here from up_xxxx(), another thread might have reached 81 /* Writers might steal the lock before we grant it to the next reader.
83 * rwsem_down_failed_common() before we acquired the spinlock and 82 * We prefer to do the first reader grant before counting readers
84 * woken up a waiter, making it now active. We prefer to check for 83 * so we can bail out early if a writer stole the lock.
85 * this first in order to not spend too much time with the spinlock
86 * held if we're not going to be able to wake up readers in the end.
87 *
88 * Note that we do not need to update the rwsem count: any writer
89 * trying to acquire rwsem will run rwsem_down_write_failed() due
90 * to the waiting threads and block trying to acquire the spinlock.
91 *
92 * We use a dummy atomic update in order to acquire the cache line
93 * exclusively since we expect to succeed and run the final rwsem
94 * count adjustment pretty soon.
95 */ 84 */
96 if (wake_type == RWSEM_WAKE_ANY && 85 adjustment = 0;
97 rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) 86 if (wake_type != RWSEM_WAKE_READ_OWNED) {
98 /* Someone grabbed the sem for write already */ 87 adjustment = RWSEM_ACTIVE_READ_BIAS;
99 goto out; 88 try_reader_grant:
89 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
90 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
91 /* A writer stole the lock. Undo our reader grant. */
92 if (rwsem_atomic_update(-adjustment, sem) &
93 RWSEM_ACTIVE_MASK)
94 goto out;
95 /* Last active locker left. Retry waking readers. */
96 goto try_reader_grant;
97 }
98 }
100 99
101 /* Grant an infinite number of read locks to the readers at the front 100 /* Grant an infinite number of read locks to the readers at the front
102 * of the queue. Note we increment the 'active part' of the count by 101 * of the queue. Note we increment the 'active part' of the count by
@@ -114,12 +113,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
114 113
115 } while (waiter->type != RWSEM_WAITING_FOR_WRITE); 114 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
116 115
117 adjustment = woken * RWSEM_ACTIVE_READ_BIAS; 116 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
118 if (waiter->type != RWSEM_WAITING_FOR_WRITE) 117 if (waiter->type != RWSEM_WAITING_FOR_WRITE)
119 /* hit end of list above */ 118 /* hit end of list above */
120 adjustment -= RWSEM_WAITING_BIAS; 119 adjustment -= RWSEM_WAITING_BIAS;
121 120
122 rwsem_atomic_add(adjustment, sem); 121 if (adjustment)
122 rwsem_atomic_add(adjustment, sem);
123 123
124 next = sem->wait_list.next; 124 next = sem->wait_list.next;
125 loop = woken; 125 loop = woken;
@@ -164,8 +164,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
164 count = rwsem_atomic_update(adjustment, sem); 164 count = rwsem_atomic_update(adjustment, sem);
165 165
166 /* If there are no active locks, wake the front queued process(es). */ 166 /* If there are no active locks, wake the front queued process(es). */
167 if (count == RWSEM_WAITING_BIAS) 167 if (!(count & RWSEM_ACTIVE_MASK))
168 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); 168 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
169 169
170 raw_spin_unlock_irq(&sem->wait_lock); 170 raw_spin_unlock_irq(&sem->wait_lock);
171 171
@@ -209,7 +209,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
209 * any read locks that were queued ahead of us. */ 209 * any read locks that were queued ahead of us. */
210 if (count > RWSEM_WAITING_BIAS && 210 if (count > RWSEM_WAITING_BIAS &&
211 adjustment == -RWSEM_ACTIVE_WRITE_BIAS) 211 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
212 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); 212 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
213 213
214 /* wait until we successfully acquire the lock */ 214 /* wait until we successfully acquire the lock */
215 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 215 set_task_state(tsk, TASK_UNINTERRUPTIBLE);