diff options
author | Will Deacon <will.deacon@arm.com> | 2014-02-21 11:01:48 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-02-25 06:30:20 -0500 |
commit | c32ffce0f66e5d1d4856254516e24f5ef275cd00 (patch) | |
tree | 125229cdd38bfd6e7e62cff7eb8771a34cc999a7 /arch/arm/include | |
parent | 6ea41c80115f49e7d8b80312ffc99973d283471f (diff) |
ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw,
it turns out that issuing the pldw *after* the dmb instruction can
give modest performance gains (~3% atomic_add_return improvement on a
dual A15).
This patch adds prefetchw invocations to our barriered atomic operations
including cmpxchg, test_and_xxx and futexes.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/include')
-rw-r--r-- | arch/arm/include/asm/atomic.h | 9 | ||||
-rw-r--r-- | arch/arm/include/asm/cmpxchg.h | 6 | ||||
-rw-r--r-- | arch/arm/include/asm/futex.h | 3 |
3 files changed, 18 insertions, 0 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 62d2cb53b069..6e410090896e 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h | |||
@@ -60,6 +60,7 @@ static inline int atomic_add_return(int i, atomic_t *v) | |||
60 | int result; | 60 | int result; |
61 | 61 | ||
62 | smp_mb(); | 62 | smp_mb(); |
63 | prefetchw(&v->counter); | ||
63 | 64 | ||
64 | __asm__ __volatile__("@ atomic_add_return\n" | 65 | __asm__ __volatile__("@ atomic_add_return\n" |
65 | "1: ldrex %0, [%3]\n" | 66 | "1: ldrex %0, [%3]\n" |
@@ -99,6 +100,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
99 | int result; | 100 | int result; |
100 | 101 | ||
101 | smp_mb(); | 102 | smp_mb(); |
103 | prefetchw(&v->counter); | ||
102 | 104 | ||
103 | __asm__ __volatile__("@ atomic_sub_return\n" | 105 | __asm__ __volatile__("@ atomic_sub_return\n" |
104 | "1: ldrex %0, [%3]\n" | 106 | "1: ldrex %0, [%3]\n" |
@@ -121,6 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |||
121 | unsigned long res; | 123 | unsigned long res; |
122 | 124 | ||
123 | smp_mb(); | 125 | smp_mb(); |
126 | prefetchw(&ptr->counter); | ||
124 | 127 | ||
125 | do { | 128 | do { |
126 | __asm__ __volatile__("@ atomic_cmpxchg\n" | 129 | __asm__ __volatile__("@ atomic_cmpxchg\n" |
@@ -299,6 +302,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) | |||
299 | unsigned long tmp; | 302 | unsigned long tmp; |
300 | 303 | ||
301 | smp_mb(); | 304 | smp_mb(); |
305 | prefetchw(&v->counter); | ||
302 | 306 | ||
303 | __asm__ __volatile__("@ atomic64_add_return\n" | 307 | __asm__ __volatile__("@ atomic64_add_return\n" |
304 | "1: ldrexd %0, %H0, [%3]\n" | 308 | "1: ldrexd %0, %H0, [%3]\n" |
@@ -340,6 +344,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v) | |||
340 | unsigned long tmp; | 344 | unsigned long tmp; |
341 | 345 | ||
342 | smp_mb(); | 346 | smp_mb(); |
347 | prefetchw(&v->counter); | ||
343 | 348 | ||
344 | __asm__ __volatile__("@ atomic64_sub_return\n" | 349 | __asm__ __volatile__("@ atomic64_sub_return\n" |
345 | "1: ldrexd %0, %H0, [%3]\n" | 350 | "1: ldrexd %0, %H0, [%3]\n" |
@@ -364,6 +369,7 @@ static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old, | |||
364 | unsigned long res; | 369 | unsigned long res; |
365 | 370 | ||
366 | smp_mb(); | 371 | smp_mb(); |
372 | prefetchw(&ptr->counter); | ||
367 | 373 | ||
368 | do { | 374 | do { |
369 | __asm__ __volatile__("@ atomic64_cmpxchg\n" | 375 | __asm__ __volatile__("@ atomic64_cmpxchg\n" |
@@ -388,6 +394,7 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new) | |||
388 | unsigned long tmp; | 394 | unsigned long tmp; |
389 | 395 | ||
390 | smp_mb(); | 396 | smp_mb(); |
397 | prefetchw(&ptr->counter); | ||
391 | 398 | ||
392 | __asm__ __volatile__("@ atomic64_xchg\n" | 399 | __asm__ __volatile__("@ atomic64_xchg\n" |
393 | "1: ldrexd %0, %H0, [%3]\n" | 400 | "1: ldrexd %0, %H0, [%3]\n" |
@@ -409,6 +416,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) | |||
409 | unsigned long tmp; | 416 | unsigned long tmp; |
410 | 417 | ||
411 | smp_mb(); | 418 | smp_mb(); |
419 | prefetchw(&v->counter); | ||
412 | 420 | ||
413 | __asm__ __volatile__("@ atomic64_dec_if_positive\n" | 421 | __asm__ __volatile__("@ atomic64_dec_if_positive\n" |
414 | "1: ldrexd %0, %H0, [%3]\n" | 422 | "1: ldrexd %0, %H0, [%3]\n" |
@@ -436,6 +444,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) | |||
436 | int ret = 1; | 444 | int ret = 1; |
437 | 445 | ||
438 | smp_mb(); | 446 | smp_mb(); |
447 | prefetchw(&v->counter); | ||
439 | 448 | ||
440 | __asm__ __volatile__("@ atomic64_add_unless\n" | 449 | __asm__ __volatile__("@ atomic64_add_unless\n" |
441 | "1: ldrexd %0, %H0, [%4]\n" | 450 | "1: ldrexd %0, %H0, [%4]\n" |
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index df2fbba7efc8..abb2c3769b01 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __ASM_ARM_CMPXCHG_H | 2 | #define __ASM_ARM_CMPXCHG_H |
3 | 3 | ||
4 | #include <linux/irqflags.h> | 4 | #include <linux/irqflags.h> |
5 | #include <linux/prefetch.h> | ||
5 | #include <asm/barrier.h> | 6 | #include <asm/barrier.h> |
6 | 7 | ||
7 | #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) | 8 | #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) |
@@ -35,6 +36,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size | |||
35 | #endif | 36 | #endif |
36 | 37 | ||
37 | smp_mb(); | 38 | smp_mb(); |
39 | prefetchw((const void *)ptr); | ||
38 | 40 | ||
39 | switch (size) { | 41 | switch (size) { |
40 | #if __LINUX_ARM_ARCH__ >= 6 | 42 | #if __LINUX_ARM_ARCH__ >= 6 |
@@ -138,6 +140,8 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | |||
138 | { | 140 | { |
139 | unsigned long oldval, res; | 141 | unsigned long oldval, res; |
140 | 142 | ||
143 | prefetchw((const void *)ptr); | ||
144 | |||
141 | switch (size) { | 145 | switch (size) { |
142 | #ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */ | 146 | #ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */ |
143 | case 1: | 147 | case 1: |
@@ -230,6 +234,8 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr, | |||
230 | unsigned long long oldval; | 234 | unsigned long long oldval; |
231 | unsigned long res; | 235 | unsigned long res; |
232 | 236 | ||
237 | prefetchw(ptr); | ||
238 | |||
233 | __asm__ __volatile__( | 239 | __asm__ __volatile__( |
234 | "1: ldrexd %1, %H1, [%3]\n" | 240 | "1: ldrexd %1, %H1, [%3]\n" |
235 | " teq %1, %4\n" | 241 | " teq %1, %4\n" |
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 2aff798fbef4..53e69dae796f 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ | 24 | #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ |
25 | smp_mb(); \ | 25 | smp_mb(); \ |
26 | prefetchw(uaddr); \ | ||
26 | __asm__ __volatile__( \ | 27 | __asm__ __volatile__( \ |
27 | "1: ldrex %1, [%3]\n" \ | 28 | "1: ldrex %1, [%3]\n" \ |
28 | " " insn "\n" \ | 29 | " " insn "\n" \ |
@@ -46,6 +47,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
46 | return -EFAULT; | 47 | return -EFAULT; |
47 | 48 | ||
48 | smp_mb(); | 49 | smp_mb(); |
50 | /* Prefetching cannot fault */ | ||
51 | prefetchw(uaddr); | ||
49 | __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" | 52 | __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" |
50 | "1: ldrex %1, [%4]\n" | 53 | "1: ldrex %1, [%4]\n" |
51 | " teq %1, %2\n" | 54 | " teq %1, %2\n" |