aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/include/asm/atomic.h
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-02-21 11:01:48 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2014-02-25 06:30:20 -0500
commitc32ffce0f66e5d1d4856254516e24f5ef275cd00 (patch)
tree125229cdd38bfd6e7e62cff7eb8771a34cc999a7 /arch/arm/include/asm/atomic.h
parent6ea41c80115f49e7d8b80312ffc99973d283471f (diff)
ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw, it turns out that issuing the pldw *after* the dmb instruction can give modest performance gains (~3% atomic_add_return improvement on a dual A15). This patch adds prefetchw invocations to our barriered atomic operations including cmpxchg, test_and_xxx and futexes. Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/include/asm/atomic.h')
-rw-r--r--arch/arm/include/asm/atomic.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 62d2cb53b069..6e410090896e 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -60,6 +60,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
60 int result; 60 int result;
61 61
62 smp_mb(); 62 smp_mb();
63 prefetchw(&v->counter);
63 64
64 __asm__ __volatile__("@ atomic_add_return\n" 65 __asm__ __volatile__("@ atomic_add_return\n"
65"1: ldrex %0, [%3]\n" 66"1: ldrex %0, [%3]\n"
@@ -99,6 +100,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
99 int result; 100 int result;
100 101
101 smp_mb(); 102 smp_mb();
103 prefetchw(&v->counter);
102 104
103 __asm__ __volatile__("@ atomic_sub_return\n" 105 __asm__ __volatile__("@ atomic_sub_return\n"
104"1: ldrex %0, [%3]\n" 106"1: ldrex %0, [%3]\n"
@@ -121,6 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
121 unsigned long res; 123 unsigned long res;
122 124
123 smp_mb(); 125 smp_mb();
126 prefetchw(&ptr->counter);
124 127
125 do { 128 do {
126 __asm__ __volatile__("@ atomic_cmpxchg\n" 129 __asm__ __volatile__("@ atomic_cmpxchg\n"
@@ -299,6 +302,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
299 unsigned long tmp; 302 unsigned long tmp;
300 303
301 smp_mb(); 304 smp_mb();
305 prefetchw(&v->counter);
302 306
303 __asm__ __volatile__("@ atomic64_add_return\n" 307 __asm__ __volatile__("@ atomic64_add_return\n"
304"1: ldrexd %0, %H0, [%3]\n" 308"1: ldrexd %0, %H0, [%3]\n"
@@ -340,6 +344,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v)
340 unsigned long tmp; 344 unsigned long tmp;
341 345
342 smp_mb(); 346 smp_mb();
347 prefetchw(&v->counter);
343 348
344 __asm__ __volatile__("@ atomic64_sub_return\n" 349 __asm__ __volatile__("@ atomic64_sub_return\n"
345"1: ldrexd %0, %H0, [%3]\n" 350"1: ldrexd %0, %H0, [%3]\n"
@@ -364,6 +369,7 @@ static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
364 unsigned long res; 369 unsigned long res;
365 370
366 smp_mb(); 371 smp_mb();
372 prefetchw(&ptr->counter);
367 373
368 do { 374 do {
369 __asm__ __volatile__("@ atomic64_cmpxchg\n" 375 __asm__ __volatile__("@ atomic64_cmpxchg\n"
@@ -388,6 +394,7 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
388 unsigned long tmp; 394 unsigned long tmp;
389 395
390 smp_mb(); 396 smp_mb();
397 prefetchw(&ptr->counter);
391 398
392 __asm__ __volatile__("@ atomic64_xchg\n" 399 __asm__ __volatile__("@ atomic64_xchg\n"
393"1: ldrexd %0, %H0, [%3]\n" 400"1: ldrexd %0, %H0, [%3]\n"
@@ -409,6 +416,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
409 unsigned long tmp; 416 unsigned long tmp;
410 417
411 smp_mb(); 418 smp_mb();
419 prefetchw(&v->counter);
412 420
413 __asm__ __volatile__("@ atomic64_dec_if_positive\n" 421 __asm__ __volatile__("@ atomic64_dec_if_positive\n"
414"1: ldrexd %0, %H0, [%3]\n" 422"1: ldrexd %0, %H0, [%3]\n"
@@ -436,6 +444,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
436 int ret = 1; 444 int ret = 1;
437 445
438 smp_mb(); 446 smp_mb();
447 prefetchw(&v->counter);
439 448
440 __asm__ __volatile__("@ atomic64_add_unless\n" 449 __asm__ __volatile__("@ atomic64_add_unless\n"
441"1: ldrexd %0, %H0, [%4]\n" 450"1: ldrexd %0, %H0, [%4]\n"