diff options
| author | Davidlohr Bueso <dave@stgolabs.net> | 2016-08-05 04:04:45 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2016-08-18 09:37:11 -0400 |
| commit | 70800c3c0cc525baa38fd0fe4660f2c27f1bfeeb (patch) | |
| tree | 8a7d1251640d4f1d644e996ba909f77fbf956ac4 /kernel/locking | |
| parent | c2867bbaf5d8f1534cae15175a389c5cbf58fec1 (diff) | |
locking/rwsem: Scan the wait_list for readers only once
When wanting to wakeup readers, __rwsem_mark_wakeup() currently
iterates the wait_list twice while looking to wakeup the first N
queued reader-tasks. While this can be quite inefficient, it was
there such that a awoken reader would be first and foremost
acknowledged by the lock counter.
Keeping the same logic, we can further benefit from the use of
wake_qs and avoid entirely the first wait_list iteration that sets
the counter as wake_up_process() isn't going to occur right away,
and therefore we maintain the counter->list order of going about
things.
Other than saving cycles with O(n) "scanning", this change also
nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
visually and less tedious to read.
For example, the following improvements where seen on some will
it scale microbenchmarks, on a 48-core Haswell:
v4.7 v4.7-rwsem-v1
Hmean signal1-processes-8 5792691.42 ( 0.00%) 5771971.04 ( -0.36%)
Hmean signal1-processes-12 6081199.96 ( 0.00%) 6072174.38 ( -0.15%)
Hmean signal1-processes-21 3071137.71 ( 0.00%) 3041336.72 ( -0.97%)
Hmean signal1-processes-48 3712039.98 ( 0.00%) 3708113.59 ( -0.11%)
Hmean signal1-processes-79 4464573.45 ( 0.00%) 4682798.66 ( 4.89%)
Hmean signal1-processes-110 4486842.01 ( 0.00%) 4633781.71 ( 3.27%)
Hmean signal1-processes-141 4611816.83 ( 0.00%) 4692725.38 ( 1.75%)
Hmean signal1-processes-172 4638157.05 ( 0.00%) 4714387.86 ( 1.64%)
Hmean signal1-processes-203 4465077.80 ( 0.00%) 4690348.07 ( 5.05%)
Hmean signal1-processes-224 4410433.74 ( 0.00%) 4687534.43 ( 6.28%)
Stddev signal1-processes-8 6360.47 ( 0.00%) 8455.31 ( 32.94%)
Stddev signal1-processes-12 4004.98 ( 0.00%) 9156.13 (128.62%)
Stddev signal1-processes-21 3273.14 ( 0.00%) 5016.80 ( 53.27%)
Stddev signal1-processes-48 28420.25 ( 0.00%) 26576.22 ( -6.49%)
Stddev signal1-processes-79 22038.34 ( 0.00%) 18992.70 (-13.82%)
Stddev signal1-processes-110 23226.93 ( 0.00%) 17245.79 (-25.75%)
Stddev signal1-processes-141 6358.98 ( 0.00%) 7636.14 ( 20.08%)
Stddev signal1-processes-172 9523.70 ( 0.00%) 4824.75 (-49.34%)
Stddev signal1-processes-203 13915.33 ( 0.00%) 9326.33 (-32.98%)
Stddev signal1-processes-224 15573.94 ( 0.00%) 10613.82 (-31.85%)
Other runs that saw improvements include context_switch and pipe; and
as expected, this is particularly highlighted on larger thread counts
as it becomes more expensive to walk the list twice.
No change in wakeup ordering or semantics.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-4-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 58 |
1 files changed, 26 insertions, 32 deletions
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index e02fe3289b5a..2337b4bb2366 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
| 125 | enum rwsem_wake_type wake_type, | 125 | enum rwsem_wake_type wake_type, |
| 126 | struct wake_q_head *wake_q) | 126 | struct wake_q_head *wake_q) |
| 127 | { | 127 | { |
| 128 | struct rwsem_waiter *waiter; | 128 | struct rwsem_waiter *waiter, *tmp; |
| 129 | struct task_struct *tsk; | 129 | long oldcount, woken = 0, adjustment = 0; |
| 130 | struct list_head *next; | ||
| 131 | long loop, oldcount, woken = 0, adjustment = 0; | ||
| 132 | 130 | ||
| 133 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 131 | /* |
| 132 | * Take a peek at the queue head waiter such that we can determine | ||
| 133 | * the wakeup(s) to perform. | ||
| 134 | */ | ||
| 135 | waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list); | ||
| 134 | 136 | ||
| 135 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | 137 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { |
| 136 | if (wake_type == RWSEM_WAKE_ANY) { | 138 | if (wake_type == RWSEM_WAKE_ANY) { |
| @@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
| 180 | 182 | ||
| 181 | /* | 183 | /* |
| 182 | * Grant an infinite number of read locks to the readers at the front | 184 | * Grant an infinite number of read locks to the readers at the front |
| 183 | * of the queue. Note we increment the 'active part' of the count by | 185 | * of the queue. We know that woken will be at least 1 as we accounted |
| 184 | * the number of readers before waking any processes up. | 186 | * for above. Note we increment the 'active part' of the count by the |
| 187 | * number of readers before waking any processes up. | ||
| 185 | */ | 188 | */ |
| 186 | do { | 189 | list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { |
| 187 | woken++; | 190 | struct task_struct *tsk; |
| 188 | 191 | ||
| 189 | if (waiter->list.next == &sem->wait_list) | 192 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) |
| 190 | break; | 193 | break; |
| 191 | 194 | ||
| 192 | waiter = list_entry(waiter->list.next, | 195 | woken++; |
| 193 | struct rwsem_waiter, list); | ||
| 194 | |||
| 195 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | ||
| 196 | |||
| 197 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | ||
| 198 | if (waiter->type != RWSEM_WAITING_FOR_WRITE) | ||
| 199 | /* hit end of list above */ | ||
| 200 | adjustment -= RWSEM_WAITING_BIAS; | ||
| 201 | |||
| 202 | if (adjustment) | ||
| 203 | atomic_long_add(adjustment, &sem->count); | ||
| 204 | |||
| 205 | next = sem->wait_list.next; | ||
| 206 | loop = woken; | ||
| 207 | do { | ||
| 208 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
| 209 | next = waiter->list.next; | ||
| 210 | tsk = waiter->task; | 196 | tsk = waiter->task; |
| 211 | 197 | ||
| 212 | wake_q_add(wake_q, tsk); | 198 | wake_q_add(wake_q, tsk); |
| 199 | list_del(&waiter->list); | ||
| 213 | /* | 200 | /* |
| 214 | * Ensure that the last operation is setting the reader | 201 | * Ensure that the last operation is setting the reader |
| 215 | * waiter to nil such that rwsem_down_read_failed() cannot | 202 | * waiter to nil such that rwsem_down_read_failed() cannot |
| @@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
| 217 | * to the task to wakeup. | 204 | * to the task to wakeup. |
| 218 | */ | 205 | */ |
| 219 | smp_store_release(&waiter->task, NULL); | 206 | smp_store_release(&waiter->task, NULL); |
| 220 | } while (--loop); | 207 | } |
| 221 | 208 | ||
| 222 | sem->wait_list.next = next; | 209 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
| 223 | next->prev = &sem->wait_list; | 210 | if (list_empty(&sem->wait_list)) { |
| 211 | /* hit end of list above */ | ||
| 212 | adjustment -= RWSEM_WAITING_BIAS; | ||
| 213 | } | ||
| 214 | |||
| 215 | if (adjustment) | ||
| 216 | atomic_long_add(adjustment, &sem->count); | ||
| 224 | } | 217 | } |
| 225 | 218 | ||
| 226 | /* | 219 | /* |
| @@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
| 245 | /* we're now waiting on the lock, but no longer actively locking */ | 238 | /* we're now waiting on the lock, but no longer actively locking */ |
| 246 | count = atomic_long_add_return(adjustment, &sem->count); | 239 | count = atomic_long_add_return(adjustment, &sem->count); |
| 247 | 240 | ||
| 248 | /* If there are no active locks, wake the front queued process(es). | 241 | /* |
| 242 | * If there are no active locks, wake the front queued process(es). | ||
| 249 | * | 243 | * |
| 250 | * If there are no writers and we are first in the queue, | 244 | * If there are no writers and we are first in the queue, |
| 251 | * wake our own waiter to join the existing active readers ! | 245 | * wake our own waiter to join the existing active readers ! |
