diff options
Diffstat (limited to 'lib/rwsem.c')
-rw-r--r-- | lib/rwsem.c | 150 |
1 files changed, 88 insertions, 62 deletions
diff --git a/lib/rwsem.c b/lib/rwsem.c index ceba8e28807a..f236d7cd5cf3 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c | |||
@@ -36,45 +36,56 @@ struct rwsem_waiter { | |||
36 | #define RWSEM_WAITING_FOR_WRITE 0x00000002 | 36 | #define RWSEM_WAITING_FOR_WRITE 0x00000002 |
37 | }; | 37 | }; |
38 | 38 | ||
39 | /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and | ||
40 | * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held | ||
41 | * since the rwsem value was observed. | ||
42 | */ | ||
43 | #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ | ||
44 | #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ | ||
45 | #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ | ||
46 | |||
39 | /* | 47 | /* |
40 | * handle the lock release when processes blocked on it that can now run | 48 | * handle the lock release when processes blocked on it that can now run |
41 | * - if we come here from up_xxxx(), then: | 49 | * - if we come here from up_xxxx(), then: |
42 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | 50 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) |
43 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | 51 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) |
44 | * - there must be someone on the queue | 52 | * - there must be someone on the queue |
45 | * - the spinlock must be held by the caller | 53 | * - the spinlock must be held by the caller |
46 | * - woken process blocks are discarded from the list after having task zeroed | 54 | * - woken process blocks are discarded from the list after having task zeroed |
47 | * - writers are only woken if downgrading is false | 55 | * - writers are only woken if downgrading is false |
48 | */ | 56 | */ |
49 | static inline struct rw_semaphore * | 57 | static struct rw_semaphore * |
50 | __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) | 58 | __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) |
51 | { | 59 | { |
52 | struct rwsem_waiter *waiter; | 60 | struct rwsem_waiter *waiter; |
53 | struct task_struct *tsk; | 61 | struct task_struct *tsk; |
54 | struct list_head *next; | 62 | struct list_head *next; |
55 | signed long oldcount, woken, loop; | 63 | signed long oldcount, woken, loop, adjustment; |
56 | |||
57 | if (downgrading) | ||
58 | goto dont_wake_writers; | ||
59 | |||
60 | /* if we came through an up_xxxx() call, we only only wake someone up | ||
61 | * if we can transition the active part of the count from 0 -> 1 | ||
62 | */ | ||
63 | try_again: | ||
64 | oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) | ||
65 | - RWSEM_ACTIVE_BIAS; | ||
66 | if (oldcount & RWSEM_ACTIVE_MASK) | ||
67 | goto undo; | ||
68 | 64 | ||
69 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 65 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
70 | |||
71 | /* try to grant a single write lock if there's a writer at the front | ||
72 | * of the queue - note we leave the 'active part' of the count | ||
73 | * incremented by 1 and the waiting part incremented by 0x00010000 | ||
74 | */ | ||
75 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) | 66 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) |
76 | goto readers_only; | 67 | goto readers_only; |
77 | 68 | ||
69 | if (wake_type == RWSEM_WAKE_READ_OWNED) | ||
70 | /* Another active reader was observed, so wakeup is not | ||
71 | * likely to succeed. Save the atomic op. | ||
72 | */ | ||
73 | goto out; | ||
74 | |||
75 | /* There's a writer at the front of the queue - try to grant it the | ||
76 | * write lock. However, we only wake this writer if we can transition | ||
77 | * the active part of the count from 0 -> 1 | ||
78 | */ | ||
79 | adjustment = RWSEM_ACTIVE_WRITE_BIAS; | ||
80 | if (waiter->list.next == &sem->wait_list) | ||
81 | adjustment -= RWSEM_WAITING_BIAS; | ||
82 | |||
83 | try_again_write: | ||
84 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | ||
85 | if (oldcount & RWSEM_ACTIVE_MASK) | ||
86 | /* Someone grabbed the sem already */ | ||
87 | goto undo_write; | ||
88 | |||
78 | /* We must be careful not to touch 'waiter' after we set ->task = NULL. | 89 | /* We must be careful not to touch 'waiter' after we set ->task = NULL. |
79 | * It is an allocated on the waiter's stack and may become invalid at | 90 | * It is an allocated on the waiter's stack and may become invalid at |
80 | * any time after that point (due to a wakeup from another source). | 91 | * any time after that point (due to a wakeup from another source). |
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) | |||
87 | put_task_struct(tsk); | 98 | put_task_struct(tsk); |
88 | goto out; | 99 | goto out; |
89 | 100 | ||
90 | /* don't want to wake any writers */ | 101 | readers_only: |
91 | dont_wake_writers: | 102 | /* If we come here from up_xxxx(), another thread might have reached |
92 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 103 | * rwsem_down_failed_common() before we acquired the spinlock and |
93 | if (waiter->flags & RWSEM_WAITING_FOR_WRITE) | 104 | * woken up a waiter, making it now active. We prefer to check for |
105 | * this first in order to not spend too much time with the spinlock | ||
106 | * held if we're not going to be able to wake up readers in the end. | ||
107 | * | ||
108 | * Note that we do not need to update the rwsem count: any writer | ||
109 | * trying to acquire rwsem will run rwsem_down_write_failed() due | ||
110 | * to the waiting threads and block trying to acquire the spinlock. | ||
111 | * | ||
112 | * We use a dummy atomic update in order to acquire the cache line | ||
113 | * exclusively since we expect to succeed and run the final rwsem | ||
114 | * count adjustment pretty soon. | ||
115 | */ | ||
116 | if (wake_type == RWSEM_WAKE_ANY && | ||
117 | rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) | ||
118 | /* Someone grabbed the sem for write already */ | ||
94 | goto out; | 119 | goto out; |
95 | 120 | ||
96 | /* grant an infinite number of read locks to the readers at the front | 121 | /* Grant an infinite number of read locks to the readers at the front |
97 | * of the queue | 122 | * of the queue. Note we increment the 'active part' of the count by |
98 | * - note we increment the 'active part' of the count by the number of | 123 | * the number of readers before waking any processes up. |
99 | * readers before waking any processes up | ||
100 | */ | 124 | */ |
101 | readers_only: | ||
102 | woken = 0; | 125 | woken = 0; |
103 | do { | 126 | do { |
104 | woken++; | 127 | woken++; |
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) | |||
111 | 134 | ||
112 | } while (waiter->flags & RWSEM_WAITING_FOR_READ); | 135 | } while (waiter->flags & RWSEM_WAITING_FOR_READ); |
113 | 136 | ||
114 | loop = woken; | 137 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS; |
115 | woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; | 138 | if (waiter->flags & RWSEM_WAITING_FOR_READ) |
116 | if (!downgrading) | 139 | /* hit end of list above */ |
117 | /* we'd already done one increment earlier */ | 140 | adjustment -= RWSEM_WAITING_BIAS; |
118 | woken -= RWSEM_ACTIVE_BIAS; | ||
119 | 141 | ||
120 | rwsem_atomic_add(woken, sem); | 142 | rwsem_atomic_add(adjustment, sem); |
121 | 143 | ||
122 | next = sem->wait_list.next; | 144 | next = sem->wait_list.next; |
123 | for (; loop > 0; loop--) { | 145 | for (loop = woken; loop > 0; loop--) { |
124 | waiter = list_entry(next, struct rwsem_waiter, list); | 146 | waiter = list_entry(next, struct rwsem_waiter, list); |
125 | next = waiter->list.next; | 147 | next = waiter->list.next; |
126 | tsk = waiter->task; | 148 | tsk = waiter->task; |
@@ -138,10 +160,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) | |||
138 | 160 | ||
139 | /* undo the change to the active count, but check for a transition | 161 | /* undo the change to the active count, but check for a transition |
140 | * 1->0 */ | 162 | * 1->0 */ |
141 | undo: | 163 | undo_write: |
142 | if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK) | 164 | if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) |
143 | goto out; | 165 | goto out; |
144 | goto try_again; | 166 | goto try_again_write; |
145 | } | 167 | } |
146 | 168 | ||
147 | /* | 169 | /* |
@@ -149,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) | |||
149 | */ | 171 | */ |
150 | static struct rw_semaphore __sched * | 172 | static struct rw_semaphore __sched * |
151 | rwsem_down_failed_common(struct rw_semaphore *sem, | 173 | rwsem_down_failed_common(struct rw_semaphore *sem, |
152 | struct rwsem_waiter *waiter, signed long adjustment) | 174 | unsigned int flags, signed long adjustment) |
153 | { | 175 | { |
176 | struct rwsem_waiter waiter; | ||
154 | struct task_struct *tsk = current; | 177 | struct task_struct *tsk = current; |
155 | signed long count; | 178 | signed long count; |
156 | 179 | ||
@@ -158,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem, | |||
158 | 181 | ||
159 | /* set up my own style of waitqueue */ | 182 | /* set up my own style of waitqueue */ |
160 | spin_lock_irq(&sem->wait_lock); | 183 | spin_lock_irq(&sem->wait_lock); |
161 | waiter->task = tsk; | 184 | waiter.task = tsk; |
185 | waiter.flags = flags; | ||
162 | get_task_struct(tsk); | 186 | get_task_struct(tsk); |
163 | 187 | ||
164 | list_add_tail(&waiter->list, &sem->wait_list); | 188 | if (list_empty(&sem->wait_list)) |
189 | adjustment += RWSEM_WAITING_BIAS; | ||
190 | list_add_tail(&waiter.list, &sem->wait_list); | ||
165 | 191 | ||
166 | /* we're now waiting on the lock, but no longer actively read-locking */ | 192 | /* we're now waiting on the lock, but no longer actively locking */ |
167 | count = rwsem_atomic_update(adjustment, sem); | 193 | count = rwsem_atomic_update(adjustment, sem); |
168 | 194 | ||
169 | /* if there are no active locks, wake the front queued process(es) up */ | 195 | /* If there are no active locks, wake the front queued process(es) up. |
170 | if (!(count & RWSEM_ACTIVE_MASK)) | 196 | * |
171 | sem = __rwsem_do_wake(sem, 0); | 197 | * Alternatively, if we're called from a failed down_write(), there |
198 | * were already threads queued before us and there are no active | ||
199 | * writers, the lock must be read owned; so we try to wake any read | ||
200 | * locks that were queued ahead of us. */ | ||
201 | if (count == RWSEM_WAITING_BIAS) | ||
202 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); | ||
203 | else if (count > RWSEM_WAITING_BIAS && | ||
204 | adjustment == -RWSEM_ACTIVE_WRITE_BIAS) | ||
205 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); | ||
172 | 206 | ||
173 | spin_unlock_irq(&sem->wait_lock); | 207 | spin_unlock_irq(&sem->wait_lock); |
174 | 208 | ||
175 | /* wait to be given the lock */ | 209 | /* wait to be given the lock */ |
176 | for (;;) { | 210 | for (;;) { |
177 | if (!waiter->task) | 211 | if (!waiter.task) |
178 | break; | 212 | break; |
179 | schedule(); | 213 | schedule(); |
180 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 214 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
@@ -191,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, | |||
191 | asmregparm struct rw_semaphore __sched * | 225 | asmregparm struct rw_semaphore __sched * |
192 | rwsem_down_read_failed(struct rw_semaphore *sem) | 226 | rwsem_down_read_failed(struct rw_semaphore *sem) |
193 | { | 227 | { |
194 | struct rwsem_waiter waiter; | 228 | return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, |
195 | 229 | -RWSEM_ACTIVE_READ_BIAS); | |
196 | waiter.flags = RWSEM_WAITING_FOR_READ; | ||
197 | rwsem_down_failed_common(sem, &waiter, | ||
198 | RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); | ||
199 | return sem; | ||
200 | } | 230 | } |
201 | 231 | ||
202 | /* | 232 | /* |
@@ -205,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem) | |||
205 | asmregparm struct rw_semaphore __sched * | 235 | asmregparm struct rw_semaphore __sched * |
206 | rwsem_down_write_failed(struct rw_semaphore *sem) | 236 | rwsem_down_write_failed(struct rw_semaphore *sem) |
207 | { | 237 | { |
208 | struct rwsem_waiter waiter; | 238 | return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, |
209 | 239 | -RWSEM_ACTIVE_WRITE_BIAS); | |
210 | waiter.flags = RWSEM_WAITING_FOR_WRITE; | ||
211 | rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); | ||
212 | |||
213 | return sem; | ||
214 | } | 240 | } |
215 | 241 | ||
216 | /* | 242 | /* |
@@ -225,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | |||
225 | 251 | ||
226 | /* do nothing if list empty */ | 252 | /* do nothing if list empty */ |
227 | if (!list_empty(&sem->wait_list)) | 253 | if (!list_empty(&sem->wait_list)) |
228 | sem = __rwsem_do_wake(sem, 0); | 254 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); |
229 | 255 | ||
230 | spin_unlock_irqrestore(&sem->wait_lock, flags); | 256 | spin_unlock_irqrestore(&sem->wait_lock, flags); |
231 | 257 | ||
@@ -245,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) | |||
245 | 271 | ||
246 | /* do nothing if list empty */ | 272 | /* do nothing if list empty */ |
247 | if (!list_empty(&sem->wait_list)) | 273 | if (!list_empty(&sem->wait_list)) |
248 | sem = __rwsem_do_wake(sem, 1); | 274 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); |
249 | 275 | ||
250 | spin_unlock_irqrestore(&sem->wait_lock, flags); | 276 | spin_unlock_irqrestore(&sem->wait_lock, flags); |
251 | 277 | ||