aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaiman Long <Waiman.Long@hpe.com>2016-05-17 21:26:19 -0400
committerIngo Molnar <mingo@kernel.org>2016-06-08 09:16:59 -0400
commit19c5d690e41697fcdd19379ab9d10d8d37818414 (patch)
tree6212929f726d890a738b80932903f690c6faad4b
parentd157bd860f1c828593730dca594d0ce51956833b (diff)
locking/rwsem: Add reader-owned state to the owner field
Currently, it is not possible to determine for sure if a reader owns a rwsem by looking at the content of the rwsem data structure. This patch adds a new state RWSEM_READER_OWNED to the owner field to indicate that readers currently own the lock. This enables us to address the following 2 issues in the rwsem optimistic spinning code: 1) rwsem_can_spin_on_owner() will disallow optimistic spinning if the owner field is NULL which can mean either the readers own the lock or the owning writer hasn't set the owner field yet. In the latter case, we miss the chance to do optimistic spinning. 2) While a writer is waiting in the OSQ and a reader takes the lock, the writer will continue to spin when out of the OSQ in the main rwsem_optimistic_spin() loop as the owner field is NULL wasting CPU cycles if some of readers are sleeping. Adding the new state will allow optimistic spinning to go forward as long as the owner field is not RWSEM_READER_OWNED and the owner is running, if set, but stop immediately when that state has been reached. On a 4-socket Haswell machine running on a 4.6-rc1 based kernel, the fio test with multithreaded randrw and randwrite tests on the same file on a XFS partition on top of a NVDIMM were run, the aggregated bandwidths before and after the patch were as follows: Test BW before patch BW after patch % change ---- --------------- -------------- -------- randrw 988 MB/s 1192 MB/s +21% randwrite 1513 MB/s 1623 MB/s +7.3% The perf profile of the rwsem_down_write_failed() function in randrw before and after the patch were: 19.95% 5.88% fio [kernel.vmlinux] [k] rwsem_down_write_failed 14.20% 1.52% fio [kernel.vmlinux] [k] rwsem_down_write_failed The actual CPU cycles spend in rwsem_down_write_failed() dropped from 5.88% to 1.52% after the patch. The xfstests was also run and no regression was observed. Signed-off-by: Waiman Long <Waiman.Long@hpe.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Jason Low <jason.low2@hp.com> Acked-by: Davidlohr Bueso <dave@stgolabs.net> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Douglas Hatch <doug.hatch@hpe.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Hurley <peter@hurleysoftware.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Scott J Norton <scott.norton@hpe.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1463534783-38814-2-git-send-email-Waiman.Long@hpe.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/locking/rwsem-xadd.c41
-rw-r--r--kernel/locking/rwsem.c8
-rw-r--r--kernel/locking/rwsem.h41
3 files changed, 69 insertions, 21 deletions
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 63b40a5c62ec..6b0d0605910e 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -163,6 +163,12 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
163 /* Last active locker left. Retry waking readers. */ 163 /* Last active locker left. Retry waking readers. */
164 goto try_reader_grant; 164 goto try_reader_grant;
165 } 165 }
166 /*
167 * It is not really necessary to set it to reader-owned here,
168 * but it gives the spinners an early indication that the
169 * readers now have the lock.
170 */
171 rwsem_set_reader_owned(sem);
166 } 172 }
167 173
168 /* Grant an infinite number of read locks to the readers at the front 174 /* Grant an infinite number of read locks to the readers at the front
@@ -325,16 +331,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
325 331
326 rcu_read_lock(); 332 rcu_read_lock();
327 owner = READ_ONCE(sem->owner); 333 owner = READ_ONCE(sem->owner);
328 if (!owner) { 334 if (!rwsem_owner_is_writer(owner)) {
329 long count = atomic_long_read(&sem->count);
330 /* 335 /*
331 * If sem->owner is not set, yet we have just recently entered the 336 * Don't spin if the rwsem is readers owned.
332 * slowpath with the lock being active, then there is a possibility
333 * reader(s) may have the lock. To be safe, bail spinning in these
334 * situations.
335 */ 337 */
336 if (count & RWSEM_ACTIVE_MASK) 338 ret = !rwsem_owner_is_reader(owner);
337 ret = false;
338 goto done; 339 goto done;
339 } 340 }
340 341
@@ -347,8 +348,6 @@ done:
347static noinline 348static noinline
348bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) 349bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
349{ 350{
350 long count;
351
352 rcu_read_lock(); 351 rcu_read_lock();
353 while (sem->owner == owner) { 352 while (sem->owner == owner) {
354 /* 353 /*
@@ -369,16 +368,11 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
369 } 368 }
370 rcu_read_unlock(); 369 rcu_read_unlock();
371 370
372 if (READ_ONCE(sem->owner))
373 return true; /* new owner, continue spinning */
374
375 /* 371 /*
376 * When the owner is not set, the lock could be free or 372 * If there is a new owner or the owner is not set, we continue
377 * held by readers. Check the counter to verify the 373 * spinning.
378 * state.
379 */ 374 */
380 count = atomic_long_read(&sem->count); 375 return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
381 return (count == 0 || count == RWSEM_WAITING_BIAS);
382} 376}
383 377
384static bool rwsem_optimistic_spin(struct rw_semaphore *sem) 378static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -397,7 +391,16 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
397 391
398 while (true) { 392 while (true) {
399 owner = READ_ONCE(sem->owner); 393 owner = READ_ONCE(sem->owner);
400 if (owner && !rwsem_spin_on_owner(sem, owner)) 394 /*
395 * Don't spin if
396 * 1) the owner is a reader as we we can't determine if the
397 * reader is actively running or not.
398 * 2) The rwsem_spin_on_owner() returns false which means
399 * the owner isn't running.
400 */
401 if (rwsem_owner_is_reader(owner) ||
402 (rwsem_owner_is_writer(owner) &&
403 !rwsem_spin_on_owner(sem, owner)))
401 break; 404 break;
402 405
403 /* wait_lock will be acquired if write_lock is obtained */ 406 /* wait_lock will be acquired if write_lock is obtained */
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2e853ad93a3a..45ba475d4be3 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem)
22 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); 22 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
23 23
24 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 24 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
25 rwsem_set_reader_owned(sem);
25} 26}
26 27
27EXPORT_SYMBOL(down_read); 28EXPORT_SYMBOL(down_read);
@@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem)
33{ 34{
34 int ret = __down_read_trylock(sem); 35 int ret = __down_read_trylock(sem);
35 36
36 if (ret == 1) 37 if (ret == 1) {
37 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); 38 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
39 rwsem_set_reader_owned(sem);
40 }
38 return ret; 41 return ret;
39} 42}
40 43
@@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem)
124 * lockdep: a downgraded write will live on as a write 127 * lockdep: a downgraded write will live on as a write
125 * dependency. 128 * dependency.
126 */ 129 */
127 rwsem_clear_owner(sem); 130 rwsem_set_reader_owned(sem);
128 __downgrade_write(sem); 131 __downgrade_write(sem);
129} 132}
130 133
@@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
138 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); 141 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
139 142
140 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 143 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
144 rwsem_set_reader_owned(sem);
141} 145}
142 146
143EXPORT_SYMBOL(down_read_nested); 147EXPORT_SYMBOL(down_read_nested);
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 870ed9a5b426..8f43ba234787 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,3 +1,20 @@
1/*
2 * The owner field of the rw_semaphore structure will be set to
3 * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
4 * the owner field when it unlocks. A reader, on the other hand, will
5 * not touch the owner field when it unlocks.
6 *
7 * In essence, the owner field now has the following 3 states:
8 * 1) 0
9 * - lock is free or the owner hasn't set the field yet
10 * 2) RWSEM_READER_OWNED
11 * - lock is currently or previously owned by readers (lock is free
12 * or not set by owner yet)
13 * 3) Other non-zero value
14 * - a writer owns the lock
15 */
16#define RWSEM_READER_OWNED ((struct task_struct *)1UL)
17
1#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 18#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
2static inline void rwsem_set_owner(struct rw_semaphore *sem) 19static inline void rwsem_set_owner(struct rw_semaphore *sem)
3{ 20{
@@ -9,6 +26,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
9 sem->owner = NULL; 26 sem->owner = NULL;
10} 27}
11 28
29static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
30{
31 /*
32 * We check the owner value first to make sure that we will only
33 * do a write to the rwsem cacheline when it is really necessary
34 * to minimize cacheline contention.
35 */
36 if (sem->owner != RWSEM_READER_OWNED)
37 sem->owner = RWSEM_READER_OWNED;
38}
39
40static inline bool rwsem_owner_is_writer(struct task_struct *owner)
41{
42 return owner && owner != RWSEM_READER_OWNED;
43}
44
45static inline bool rwsem_owner_is_reader(struct task_struct *owner)
46{
47 return owner == RWSEM_READER_OWNED;
48}
12#else 49#else
13static inline void rwsem_set_owner(struct rw_semaphore *sem) 50static inline void rwsem_set_owner(struct rw_semaphore *sem)
14{ 51{
@@ -17,4 +54,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem)
17static inline void rwsem_clear_owner(struct rw_semaphore *sem) 54static inline void rwsem_clear_owner(struct rw_semaphore *sem)
18{ 55{
19} 56}
57
58static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
59{
60}
20#endif 61#endif