diff options
| author | Waiman Long <Waiman.Long@hp.com> | 2015-06-09 11:19:13 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2015-06-19 03:45:38 -0400 |
| commit | 405963b6a57c60040bc1dad2597f7f4b897954d1 (patch) | |
| tree | 960acc0117e6c8960456ed3227bc313a504d5b32 /kernel/locking | |
| parent | 92ae18371cb1abb4e186dd9d48de2bb0d9bba626 (diff) | |
locking/qrwlock: Don't contend with readers when setting _QW_WAITING
The current cmpxchg() loop in setting the _QW_WAITING flag for writers
in queue_write_lock_slowpath() will contend with incoming readers
causing possibly extra cmpxchg() operations that are wasteful. This
patch changes the code to do a byte cmpxchg() to eliminate contention
with new readers.
A multithreaded microbenchmark running 5M read_lock/write_lock loop
on a 8-socket 80-core Westmere-EX machine running 4.0 based kernel
with the qspinlock patch have the following execution times (in ms)
with and without the patch:
With R:W ratio = 5:1
Threads w/o patch with patch % change
------- --------- ---------- --------
2 990 895 -9.6%
3 2136 1912 -10.5%
4 3166 2830 -10.6%
5 3953 3629 -8.2%
6 4628 4405 -4.8%
7 5344 5197 -2.8%
8 6065 6004 -1.0%
9 6826 6811 -0.2%
10 7599 7599 0.0%
15 9757 9766 +0.1%
20 13767 13817 +0.4%
With small number of contending threads, this patch can improve
locking performance by up to 10%. With more contending threads,
however, the gain diminishes.
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1433863153-30722-3-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/qrwlock.c | 28 |
1 files changed, 24 insertions, 4 deletions
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 00c12bb390b5..6c5da483966b 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c | |||
| @@ -22,6 +22,26 @@ | |||
| 22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
| 23 | #include <asm/qrwlock.h> | 23 | #include <asm/qrwlock.h> |
| 24 | 24 | ||
| 25 | /* | ||
| 26 | * This internal data structure is used for optimizing access to some of | ||
| 27 | * the subfields within the atomic_t cnts. | ||
| 28 | */ | ||
| 29 | struct __qrwlock { | ||
| 30 | union { | ||
| 31 | atomic_t cnts; | ||
| 32 | struct { | ||
| 33 | #ifdef __LITTLE_ENDIAN | ||
| 34 | u8 wmode; /* Writer mode */ | ||
| 35 | u8 rcnts[3]; /* Reader counts */ | ||
| 36 | #else | ||
| 37 | u8 rcnts[3]; /* Reader counts */ | ||
| 38 | u8 wmode; /* Writer mode */ | ||
| 39 | #endif | ||
| 40 | }; | ||
| 41 | }; | ||
| 42 | arch_spinlock_t lock; | ||
| 43 | }; | ||
| 44 | |||
| 25 | /** | 45 | /** |
| 26 | * rspin_until_writer_unlock - inc reader count & spin until writer is gone | 46 | * rspin_until_writer_unlock - inc reader count & spin until writer is gone |
| 27 | * @lock : Pointer to queue rwlock structure | 47 | * @lock : Pointer to queue rwlock structure |
| @@ -107,10 +127,10 @@ void queue_write_lock_slowpath(struct qrwlock *lock) | |||
| 107 | * or wait for a previous writer to go away. | 127 | * or wait for a previous writer to go away. |
| 108 | */ | 128 | */ |
| 109 | for (;;) { | 129 | for (;;) { |
| 110 | cnts = atomic_read(&lock->cnts); | 130 | struct __qrwlock *l = (struct __qrwlock *)lock; |
| 111 | if (!(cnts & _QW_WMASK) && | 131 | |
| 112 | (atomic_cmpxchg(&lock->cnts, cnts, | 132 | if (!READ_ONCE(l->wmode) && |
| 113 | cnts | _QW_WAITING) == cnts)) | 133 | (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0)) |
| 114 | break; | 134 | break; |
| 115 | 135 | ||
| 116 | cpu_relax_lowlatency(); | 136 | cpu_relax_lowlatency(); |
