diff options
author | Will Deacon <will.deacon@arm.com> | 2015-08-06 12:54:42 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-08-12 05:59:06 -0400 |
commit | 77e430e3e45662b696dc49aa53ea0f7ac63f2574 (patch) | |
tree | 1fae9dab1524de8b3ca2516228ff76b809626974 | |
parent | 2b2a85a4d3534b8884fcfa5bb52837f0e1c672bc (diff) |
locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics
The qrwlock implementation is slightly heavy in its use of memory
barriers, mainly through the use of _cmpxchg() and _return() atomics, which
imply full barrier semantics.
This patch modifies the qrwlock code to use the more relaxed atomic
routines so that we can reduce the unnecessary barrier overhead on
weakly-ordered architectures.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-7-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/asm-generic/qrwlock.h | 13 | ||||
-rw-r--r-- | kernel/locking/qrwlock.c | 24 |
2 files changed, 18 insertions, 19 deletions
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index eb673dde8879..54a8e65e18b6 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h | |||
@@ -68,7 +68,7 @@ static inline int queued_read_trylock(struct qrwlock *lock) | |||
68 | 68 | ||
69 | cnts = atomic_read(&lock->cnts); | 69 | cnts = atomic_read(&lock->cnts); |
70 | if (likely(!(cnts & _QW_WMASK))) { | 70 | if (likely(!(cnts & _QW_WMASK))) { |
71 | cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts); | 71 | cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts); |
72 | if (likely(!(cnts & _QW_WMASK))) | 72 | if (likely(!(cnts & _QW_WMASK))) |
73 | return 1; | 73 | return 1; |
74 | atomic_sub(_QR_BIAS, &lock->cnts); | 74 | atomic_sub(_QR_BIAS, &lock->cnts); |
@@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock) | |||
89 | if (unlikely(cnts)) | 89 | if (unlikely(cnts)) |
90 | return 0; | 90 | return 0; |
91 | 91 | ||
92 | return likely(atomic_cmpxchg(&lock->cnts, | 92 | return likely(atomic_cmpxchg_acquire(&lock->cnts, |
93 | cnts, cnts | _QW_LOCKED) == cnts); | 93 | cnts, cnts | _QW_LOCKED) == cnts); |
94 | } | 94 | } |
95 | /** | 95 | /** |
96 | * queued_read_lock - acquire read lock of a queue rwlock | 96 | * queued_read_lock - acquire read lock of a queue rwlock |
@@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock) | |||
100 | { | 100 | { |
101 | u32 cnts; | 101 | u32 cnts; |
102 | 102 | ||
103 | cnts = atomic_add_return(_QR_BIAS, &lock->cnts); | 103 | cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts); |
104 | if (likely(!(cnts & _QW_WMASK))) | 104 | if (likely(!(cnts & _QW_WMASK))) |
105 | return; | 105 | return; |
106 | 106 | ||
@@ -115,7 +115,7 @@ static inline void queued_read_lock(struct qrwlock *lock) | |||
115 | static inline void queued_write_lock(struct qrwlock *lock) | 115 | static inline void queued_write_lock(struct qrwlock *lock) |
116 | { | 116 | { |
117 | /* Optimize for the unfair lock case where the fair flag is 0. */ | 117 | /* Optimize for the unfair lock case where the fair flag is 0. */ |
118 | if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0) | 118 | if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0) |
119 | return; | 119 | return; |
120 | 120 | ||
121 | queued_write_lock_slowpath(lock); | 121 | queued_write_lock_slowpath(lock); |
@@ -130,8 +130,7 @@ static inline void queued_read_unlock(struct qrwlock *lock) | |||
130 | /* | 130 | /* |
131 | * Atomically decrement the reader count | 131 | * Atomically decrement the reader count |
132 | */ | 132 | */ |
133 | smp_mb__before_atomic(); | 133 | (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts); |
134 | atomic_sub(_QR_BIAS, &lock->cnts); | ||
135 | } | 134 | } |
136 | 135 | ||
137 | /** | 136 | /** |
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 6a7a3b8d5ac9..f17a3e3b3550 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c | |||
@@ -55,7 +55,7 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) | |||
55 | { | 55 | { |
56 | while ((cnts & _QW_WMASK) == _QW_LOCKED) { | 56 | while ((cnts & _QW_WMASK) == _QW_LOCKED) { |
57 | cpu_relax_lowlatency(); | 57 | cpu_relax_lowlatency(); |
58 | cnts = smp_load_acquire((u32 *)&lock->cnts); | 58 | cnts = atomic_read_acquire(&lock->cnts); |
59 | } | 59 | } |
60 | } | 60 | } |
61 | 61 | ||
@@ -74,8 +74,9 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) | |||
74 | * Readers in interrupt context will get the lock immediately | 74 | * Readers in interrupt context will get the lock immediately |
75 | * if the writer is just waiting (not holding the lock yet). | 75 | * if the writer is just waiting (not holding the lock yet). |
76 | * The rspin_until_writer_unlock() function returns immediately | 76 | * The rspin_until_writer_unlock() function returns immediately |
77 | * in this case. Otherwise, they will spin until the lock | 77 | * in this case. Otherwise, they will spin (with ACQUIRE |
78 | * is available without waiting in the queue. | 78 | * semantics) until the lock is available without waiting in |
79 | * the queue. | ||
79 | */ | 80 | */ |
80 | rspin_until_writer_unlock(lock, cnts); | 81 | rspin_until_writer_unlock(lock, cnts); |
81 | return; | 82 | return; |
@@ -88,12 +89,11 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) | |||
88 | arch_spin_lock(&lock->lock); | 89 | arch_spin_lock(&lock->lock); |
89 | 90 | ||
90 | /* | 91 | /* |
91 | * At the head of the wait queue now, increment the reader count | 92 | * The ACQUIRE semantics of the following spinning code ensure |
92 | * and wait until the writer, if it has the lock, has gone away. | 93 | * that accesses can't leak upwards out of our subsequent critical |
93 | * At ths stage, it is not possible for a writer to remain in the | 94 | * section in the case that the lock is currently held for write. |
94 | * waiting state (_QW_WAITING). So there won't be any deadlock. | ||
95 | */ | 95 | */ |
96 | cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; | 96 | cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS; |
97 | rspin_until_writer_unlock(lock, cnts); | 97 | rspin_until_writer_unlock(lock, cnts); |
98 | 98 | ||
99 | /* | 99 | /* |
@@ -116,7 +116,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) | |||
116 | 116 | ||
117 | /* Try to acquire the lock directly if no reader is present */ | 117 | /* Try to acquire the lock directly if no reader is present */ |
118 | if (!atomic_read(&lock->cnts) && | 118 | if (!atomic_read(&lock->cnts) && |
119 | (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)) | 119 | (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)) |
120 | goto unlock; | 120 | goto unlock; |
121 | 121 | ||
122 | /* | 122 | /* |
@@ -127,7 +127,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) | |||
127 | struct __qrwlock *l = (struct __qrwlock *)lock; | 127 | struct __qrwlock *l = (struct __qrwlock *)lock; |
128 | 128 | ||
129 | if (!READ_ONCE(l->wmode) && | 129 | if (!READ_ONCE(l->wmode) && |
130 | (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0)) | 130 | (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0)) |
131 | break; | 131 | break; |
132 | 132 | ||
133 | cpu_relax_lowlatency(); | 133 | cpu_relax_lowlatency(); |
@@ -137,8 +137,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock) | |||
137 | for (;;) { | 137 | for (;;) { |
138 | cnts = atomic_read(&lock->cnts); | 138 | cnts = atomic_read(&lock->cnts); |
139 | if ((cnts == _QW_WAITING) && | 139 | if ((cnts == _QW_WAITING) && |
140 | (atomic_cmpxchg(&lock->cnts, _QW_WAITING, | 140 | (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING, |
141 | _QW_LOCKED) == _QW_WAITING)) | 141 | _QW_LOCKED) == _QW_WAITING)) |
142 | break; | 142 | break; |
143 | 143 | ||
144 | cpu_relax_lowlatency(); | 144 | cpu_relax_lowlatency(); |