aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2010-08-09 20:21:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-09 23:45:10 -0400
commit70bdc6e0644f3535e93bac5c364ca199397e507e (patch)
treeaf5264cc57fcd69deae10fdd67cb62461111aa4c
parent345af7bf3304410634c21ada4664fda83d4d9a16 (diff)
rwsem: lighter active count checks when waking up readers
In __rwsem_do_wake(), we can skip the active count check unless we come there from up_xxxx(). Also when checking the active count, it is not actually necessary to increment it; this allows us to get rid of the read side undo code and simplify the calculation of the final rwsem count adjustment once we've counted the reader threads to wake. The basic observation is the following. When there are waiter threads on a rwsem and the spinlock is held, other threads can only increment the active count by trying to grab the rwsem in down_xxxx(). However down_xxxx() will notice there are waiter threads and take the down_failed path, blocking to acquire the spinlock on the way there. Therefore, a thread observing an active count of zero with waiters queued and the spinlock held, is protected against other threads acquiring the rwsem until it wakes the last waiter or releases the spinlock. Signed-off-by: Michel Lespinasse <walken@google.com> Acked-by: David Howells <dhowells@redhat.com> Cc: Mike Waychison <mikew@google.com> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Ying Han <yinghan@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--lib/rwsem.c57
1 files changed, 32 insertions, 25 deletions
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 917fd946b495..94f2d7a9dc4f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,6 +36,14 @@ struct rwsem_waiter {
36#define RWSEM_WAITING_FOR_WRITE 0x00000002 36#define RWSEM_WAITING_FOR_WRITE 0x00000002
37}; 37};
38 38
39/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
40 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
41 * since the rwsem value was observed.
42 */
43#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
44#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
45#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
46
39/* 47/*
40 * handle the lock release when processes blocked on it that can now run 48 * handle the lock release when processes blocked on it that can now run
41 * - if we come here from up_xxxx(), then: 49 * - if we come here from up_xxxx(), then:
@@ -46,8 +54,8 @@ struct rwsem_waiter {
46 * - woken process blocks are discarded from the list after having task zeroed 54 * - woken process blocks are discarded from the list after having task zeroed
47 * - writers are only woken if downgrading is false 55 * - writers are only woken if downgrading is false
48 */ 56 */
49static inline struct rw_semaphore * 57static struct rw_semaphore *
50__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) 58__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
51{ 59{
52 struct rwsem_waiter *waiter; 60 struct rwsem_waiter *waiter;
53 struct task_struct *tsk; 61 struct task_struct *tsk;
@@ -58,7 +66,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
58 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
59 goto readers_only; 67 goto readers_only;
60 68
61 if (downgrading) 69 if (wake_type == RWSEM_WAKE_READ_OWNED)
62 goto out; 70 goto out;
63 71
64 /* There's a writer at the front of the queue - try to grant it the 72 /* There's a writer at the front of the queue - try to grant it the
@@ -85,19 +93,25 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
85 goto out; 93 goto out;
86 94
87 readers_only: 95 readers_only:
88 if (downgrading) 96 /* If we come here from up_xxxx(), another thread might have reached
89 goto wake_readers; 97 * rwsem_down_failed_common() before we acquired the spinlock and
90 98 * woken up a waiter, making it now active. We prefer to check for
91 /* if we came through an up_xxxx() call, we only only wake someone up 99 * this first in order to not spend too much time with the spinlock
92 * if we can transition the active part of the count from 0 -> 1 */ 100 * held if we're not going to be able to wake up readers in the end.
93 try_again_read: 101 *
94 oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) 102 * Note that we do not need to update the rwsem count: any writer
95 - RWSEM_ACTIVE_BIAS; 103 * trying to acquire rwsem will run rwsem_down_write_failed() due
96 if (oldcount & RWSEM_ACTIVE_MASK) 104 * to the waiting threads and block trying to acquire the spinlock.
105 *
106 * We use a dummy atomic update in order to acquire the cache line
107 * exclusively since we expect to succeed and run the final rwsem
108 * count adjustment pretty soon.
109 */
110 if (wake_type == RWSEM_WAKE_ANY &&
111 (rwsem_atomic_update(0, sem) & RWSEM_ACTIVE_MASK))
97 /* Someone grabbed the sem already */ 112 /* Someone grabbed the sem already */
98 goto undo_read; 113 goto out;
99 114
100 wake_readers:
101 /* Grant an infinite number of read locks to the readers at the front 115 /* Grant an infinite number of read locks to the readers at the front
102 * of the queue. Note we increment the 'active part' of the count by 116 * of the queue. Note we increment the 'active part' of the count by
103 * the number of readers before waking any processes up. 117 * the number of readers before waking any processes up.
@@ -116,9 +130,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
116 130
117 loop = woken; 131 loop = woken;
118 woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; 132 woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
119 if (!downgrading)
120 /* we'd already done one increment earlier */
121 woken -= RWSEM_ACTIVE_BIAS;
122 133
123 rwsem_atomic_add(woken, sem); 134 rwsem_atomic_add(woken, sem);
124 135
@@ -145,10 +156,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
145 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK) 156 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK)
146 goto out; 157 goto out;
147 goto try_again_write; 158 goto try_again_write;
148 undo_read:
149 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK)
150 goto out;
151 goto try_again_read;
152} 159}
153 160
154/* 161/*
@@ -170,12 +177,12 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
170 177
171 list_add_tail(&waiter->list, &sem->wait_list); 178 list_add_tail(&waiter->list, &sem->wait_list);
172 179
173 /* we're now waiting on the lock, but no longer actively read-locking */ 180 /* we're now waiting on the lock, but no longer actively locking */
174 count = rwsem_atomic_update(adjustment, sem); 181 count = rwsem_atomic_update(adjustment, sem);
175 182
176 /* if there are no active locks, wake the front queued process(es) up */ 183 /* if there are no active locks, wake the front queued process(es) up */
177 if (!(count & RWSEM_ACTIVE_MASK)) 184 if (!(count & RWSEM_ACTIVE_MASK))
178 sem = __rwsem_do_wake(sem, 0); 185 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
179 186
180 spin_unlock_irq(&sem->wait_lock); 187 spin_unlock_irq(&sem->wait_lock);
181 188
@@ -232,7 +239,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
232 239
233 /* do nothing if list empty */ 240 /* do nothing if list empty */
234 if (!list_empty(&sem->wait_list)) 241 if (!list_empty(&sem->wait_list))
235 sem = __rwsem_do_wake(sem, 0); 242 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
236 243
237 spin_unlock_irqrestore(&sem->wait_lock, flags); 244 spin_unlock_irqrestore(&sem->wait_lock, flags);
238 245
@@ -252,7 +259,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
252 259
253 /* do nothing if list empty */ 260 /* do nothing if list empty */
254 if (!list_empty(&sem->wait_list)) 261 if (!list_empty(&sem->wait_list))
255 sem = __rwsem_do_wake(sem, 1); 262 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
256 263
257 spin_unlock_irqrestore(&sem->wait_lock, flags); 264 spin_unlock_irqrestore(&sem->wait_lock, flags);
258 265