aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2013-07-02 09:54:33 -0400
committerWill Deacon <will.deacon@arm.com>2013-09-30 11:42:55 -0400
commit9bb17be062de6f5a9c9643258951aa0935652ec3 (patch)
treecf430be919c709f50752dc856a5384d329abcaee
parentd8f57aa4bc5860df68d4c332d2a89c131417ee7b (diff)
ARM: locks: prefetch the destination word for write prior to strex
The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch prefixes our {spin,read,write}_[try]lock implementations with pldw instructions (on CPUs which support them) to try and grab the line in exclusive state from the start. arch_rwlock_t is changed to avoid using a volatile member, since this generates compiler warnings when falling back on the __builtin_prefetch intrinsic which expects a const void * argument. Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--arch/arm/include/asm/spinlock.h13
-rw-r--r--arch/arm/include/asm/spinlock_types.h2
2 files changed, 11 insertions, 4 deletions
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index e1ce45230913..499900781d59 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -5,7 +5,7 @@
5#error SMP not supported on pre-ARMv6 CPUs 5#error SMP not supported on pre-ARMv6 CPUs
6#endif 6#endif
7 7
8#include <asm/processor.h> 8#include <linux/prefetch.h>
9 9
10/* 10/*
11 * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K 11 * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
@@ -70,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
70 u32 newval; 70 u32 newval;
71 arch_spinlock_t lockval; 71 arch_spinlock_t lockval;
72 72
73 prefetchw(&lock->slock);
73 __asm__ __volatile__( 74 __asm__ __volatile__(
74"1: ldrex %0, [%3]\n" 75"1: ldrex %0, [%3]\n"
75" add %1, %0, %4\n" 76" add %1, %0, %4\n"
@@ -93,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
93 unsigned long contended, res; 94 unsigned long contended, res;
94 u32 slock; 95 u32 slock;
95 96
97 prefetchw(&lock->slock);
96 do { 98 do {
97 __asm__ __volatile__( 99 __asm__ __volatile__(
98 " ldrex %0, [%3]\n" 100 " ldrex %0, [%3]\n"
@@ -145,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
145{ 147{
146 unsigned long tmp; 148 unsigned long tmp;
147 149
150 prefetchw(&rw->lock);
148 __asm__ __volatile__( 151 __asm__ __volatile__(
149"1: ldrex %0, [%1]\n" 152"1: ldrex %0, [%1]\n"
150" teq %0, #0\n" 153" teq %0, #0\n"
@@ -163,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
163{ 166{
164 unsigned long contended, res; 167 unsigned long contended, res;
165 168
169 prefetchw(&rw->lock);
166 do { 170 do {
167 __asm__ __volatile__( 171 __asm__ __volatile__(
168 " ldrex %0, [%2]\n" 172 " ldrex %0, [%2]\n"
@@ -196,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
196} 200}
197 201
198/* write_can_lock - would write_trylock() succeed? */ 202/* write_can_lock - would write_trylock() succeed? */
199#define arch_write_can_lock(x) ((x)->lock == 0) 203#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0)
200 204
201/* 205/*
202 * Read locks are a bit more hairy: 206 * Read locks are a bit more hairy:
@@ -214,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
214{ 218{
215 unsigned long tmp, tmp2; 219 unsigned long tmp, tmp2;
216 220
221 prefetchw(&rw->lock);
217 __asm__ __volatile__( 222 __asm__ __volatile__(
218"1: ldrex %0, [%2]\n" 223"1: ldrex %0, [%2]\n"
219" adds %0, %0, #1\n" 224" adds %0, %0, #1\n"
@@ -234,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
234 239
235 smp_mb(); 240 smp_mb();
236 241
242 prefetchw(&rw->lock);
237 __asm__ __volatile__( 243 __asm__ __volatile__(
238"1: ldrex %0, [%2]\n" 244"1: ldrex %0, [%2]\n"
239" sub %0, %0, #1\n" 245" sub %0, %0, #1\n"
@@ -252,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
252{ 258{
253 unsigned long contended, res; 259 unsigned long contended, res;
254 260
261 prefetchw(&rw->lock);
255 do { 262 do {
256 __asm__ __volatile__( 263 __asm__ __volatile__(
257 " ldrex %0, [%2]\n" 264 " ldrex %0, [%2]\n"
@@ -273,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
273} 280}
274 281
275/* read_can_lock - would read_trylock() succeed? */ 282/* read_can_lock - would read_trylock() succeed? */
276#define arch_read_can_lock(x) ((x)->lock < 0x80000000) 283#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000)
277 284
278#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 285#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
279#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 286#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index b262d2f8b478..47663fcb10ad 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -25,7 +25,7 @@ typedef struct {
25#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } 25#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
26 26
27typedef struct { 27typedef struct {
28 volatile unsigned int lock; 28 u32 lock;
29} arch_rwlock_t; 29} arch_rwlock_t;
30 30
31#define __ARCH_RW_LOCK_UNLOCKED { 0 } 31#define __ARCH_RW_LOCK_UNLOCKED { 0 }