diff options
author | Davidlohr Bueso <dave@stgolabs.net> | 2016-08-05 04:04:45 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-08-18 09:37:11 -0400 |
commit | 70800c3c0cc525baa38fd0fe4660f2c27f1bfeeb (patch) | |
tree | 8a7d1251640d4f1d644e996ba909f77fbf956ac4 | |
parent | c2867bbaf5d8f1534cae15175a389c5cbf58fec1 (diff) |
locking/rwsem: Scan the wait_list for readers only once
When wanting to wakeup readers, __rwsem_mark_wakeup() currently
iterates the wait_list twice while looking to wakeup the first N
queued reader-tasks. While this can be quite inefficient, it was
there such that a awoken reader would be first and foremost
acknowledged by the lock counter.
Keeping the same logic, we can further benefit from the use of
wake_qs and avoid entirely the first wait_list iteration that sets
the counter as wake_up_process() isn't going to occur right away,
and therefore we maintain the counter->list order of going about
things.
Other than saving cycles with O(n) "scanning", this change also
nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
visually and less tedious to read.
For example, the following improvements where seen on some will
it scale microbenchmarks, on a 48-core Haswell:
v4.7 v4.7-rwsem-v1
Hmean signal1-processes-8 5792691.42 ( 0.00%) 5771971.04 ( -0.36%)
Hmean signal1-processes-12 6081199.96 ( 0.00%) 6072174.38 ( -0.15%)
Hmean signal1-processes-21 3071137.71 ( 0.00%) 3041336.72 ( -0.97%)
Hmean signal1-processes-48 3712039.98 ( 0.00%) 3708113.59 ( -0.11%)
Hmean signal1-processes-79 4464573.45 ( 0.00%) 4682798.66 ( 4.89%)
Hmean signal1-processes-110 4486842.01 ( 0.00%) 4633781.71 ( 3.27%)
Hmean signal1-processes-141 4611816.83 ( 0.00%) 4692725.38 ( 1.75%)
Hmean signal1-processes-172 4638157.05 ( 0.00%) 4714387.86 ( 1.64%)
Hmean signal1-processes-203 4465077.80 ( 0.00%) 4690348.07 ( 5.05%)
Hmean signal1-processes-224 4410433.74 ( 0.00%) 4687534.43 ( 6.28%)
Stddev signal1-processes-8 6360.47 ( 0.00%) 8455.31 ( 32.94%)
Stddev signal1-processes-12 4004.98 ( 0.00%) 9156.13 (128.62%)
Stddev signal1-processes-21 3273.14 ( 0.00%) 5016.80 ( 53.27%)
Stddev signal1-processes-48 28420.25 ( 0.00%) 26576.22 ( -6.49%)
Stddev signal1-processes-79 22038.34 ( 0.00%) 18992.70 (-13.82%)
Stddev signal1-processes-110 23226.93 ( 0.00%) 17245.79 (-25.75%)
Stddev signal1-processes-141 6358.98 ( 0.00%) 7636.14 ( 20.08%)
Stddev signal1-processes-172 9523.70 ( 0.00%) 4824.75 (-49.34%)
Stddev signal1-processes-203 13915.33 ( 0.00%) 9326.33 (-32.98%)
Stddev signal1-processes-224 15573.94 ( 0.00%) 10613.82 (-31.85%)
Other runs that saw improvements include context_switch and pipe; and
as expected, this is particularly highlighted on larger thread counts
as it becomes more expensive to walk the list twice.
No change in wakeup ordering or semantics.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-4-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 58 |
1 files changed, 26 insertions, 32 deletions
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index e02fe3289b5a..2337b4bb2366 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
125 | enum rwsem_wake_type wake_type, | 125 | enum rwsem_wake_type wake_type, |
126 | struct wake_q_head *wake_q) | 126 | struct wake_q_head *wake_q) |
127 | { | 127 | { |
128 | struct rwsem_waiter *waiter; | 128 | struct rwsem_waiter *waiter, *tmp; |
129 | struct task_struct *tsk; | 129 | long oldcount, woken = 0, adjustment = 0; |
130 | struct list_head *next; | ||
131 | long loop, oldcount, woken = 0, adjustment = 0; | ||
132 | 130 | ||
133 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 131 | /* |
132 | * Take a peek at the queue head waiter such that we can determine | ||
133 | * the wakeup(s) to perform. | ||
134 | */ | ||
135 | waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list); | ||
134 | 136 | ||
135 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | 137 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { |
136 | if (wake_type == RWSEM_WAKE_ANY) { | 138 | if (wake_type == RWSEM_WAKE_ANY) { |
@@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
180 | 182 | ||
181 | /* | 183 | /* |
182 | * Grant an infinite number of read locks to the readers at the front | 184 | * Grant an infinite number of read locks to the readers at the front |
183 | * of the queue. Note we increment the 'active part' of the count by | 185 | * of the queue. We know that woken will be at least 1 as we accounted |
184 | * the number of readers before waking any processes up. | 186 | * for above. Note we increment the 'active part' of the count by the |
187 | * number of readers before waking any processes up. | ||
185 | */ | 188 | */ |
186 | do { | 189 | list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { |
187 | woken++; | 190 | struct task_struct *tsk; |
188 | 191 | ||
189 | if (waiter->list.next == &sem->wait_list) | 192 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) |
190 | break; | 193 | break; |
191 | 194 | ||
192 | waiter = list_entry(waiter->list.next, | 195 | woken++; |
193 | struct rwsem_waiter, list); | ||
194 | |||
195 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | ||
196 | |||
197 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | ||
198 | if (waiter->type != RWSEM_WAITING_FOR_WRITE) | ||
199 | /* hit end of list above */ | ||
200 | adjustment -= RWSEM_WAITING_BIAS; | ||
201 | |||
202 | if (adjustment) | ||
203 | atomic_long_add(adjustment, &sem->count); | ||
204 | |||
205 | next = sem->wait_list.next; | ||
206 | loop = woken; | ||
207 | do { | ||
208 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
209 | next = waiter->list.next; | ||
210 | tsk = waiter->task; | 196 | tsk = waiter->task; |
211 | 197 | ||
212 | wake_q_add(wake_q, tsk); | 198 | wake_q_add(wake_q, tsk); |
199 | list_del(&waiter->list); | ||
213 | /* | 200 | /* |
214 | * Ensure that the last operation is setting the reader | 201 | * Ensure that the last operation is setting the reader |
215 | * waiter to nil such that rwsem_down_read_failed() cannot | 202 | * waiter to nil such that rwsem_down_read_failed() cannot |
@@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
217 | * to the task to wakeup. | 204 | * to the task to wakeup. |
218 | */ | 205 | */ |
219 | smp_store_release(&waiter->task, NULL); | 206 | smp_store_release(&waiter->task, NULL); |
220 | } while (--loop); | 207 | } |
221 | 208 | ||
222 | sem->wait_list.next = next; | 209 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
223 | next->prev = &sem->wait_list; | 210 | if (list_empty(&sem->wait_list)) { |
211 | /* hit end of list above */ | ||
212 | adjustment -= RWSEM_WAITING_BIAS; | ||
213 | } | ||
214 | |||
215 | if (adjustment) | ||
216 | atomic_long_add(adjustment, &sem->count); | ||
224 | } | 217 | } |
225 | 218 | ||
226 | /* | 219 | /* |
@@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
245 | /* we're now waiting on the lock, but no longer actively locking */ | 238 | /* we're now waiting on the lock, but no longer actively locking */ |
246 | count = atomic_long_add_return(adjustment, &sem->count); | 239 | count = atomic_long_add_return(adjustment, &sem->count); |
247 | 240 | ||
248 | /* If there are no active locks, wake the front queued process(es). | 241 | /* |
242 | * If there are no active locks, wake the front queued process(es). | ||
249 | * | 243 | * |
250 | * If there are no writers and we are first in the queue, | 244 | * If there are no writers and we are first in the queue, |
251 | * wake our own waiter to join the existing active readers ! | 245 | * wake our own waiter to join the existing active readers ! |