aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/include
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2013-07-04 06:43:18 -0400
committerWill Deacon <will.deacon@arm.com>2013-09-30 11:42:56 -0400
commitf38d999c4d16fc0fce4270374f15fbb2d8713c09 (patch)
treec91a2a9fd5505a27ee0e8d03141842b07cc4e0c9 /arch/arm/include
parent9bb17be062de6f5a9c9643258951aa0935652ec3 (diff)
ARM: atomics: prefetch the destination word for write prior to strex
The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch prefixes our atomic access implementations with pldw instructions (on CPUs which support them) to try and grab the line in exclusive state from the start. Only the barrier-less functions are updated, since memory barriers can limit the usefulness of prefetching data. Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm/include')
-rw-r--r--arch/arm/include/asm/atomic.h7
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index da1c77d39327..55ffc3b850f4 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,6 +12,7 @@
12#define __ASM_ARM_ATOMIC_H 12#define __ASM_ARM_ATOMIC_H
13 13
14#include <linux/compiler.h> 14#include <linux/compiler.h>
15#include <linux/prefetch.h>
15#include <linux/types.h> 16#include <linux/types.h>
16#include <linux/irqflags.h> 17#include <linux/irqflags.h>
17#include <asm/barrier.h> 18#include <asm/barrier.h>
@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v)
41 unsigned long tmp; 42 unsigned long tmp;
42 int result; 43 int result;
43 44
45 prefetchw(&v->counter);
44 __asm__ __volatile__("@ atomic_add\n" 46 __asm__ __volatile__("@ atomic_add\n"
45"1: ldrex %0, [%3]\n" 47"1: ldrex %0, [%3]\n"
46" add %0, %0, %4\n" 48" add %0, %0, %4\n"
@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v)
79 unsigned long tmp; 81 unsigned long tmp;
80 int result; 82 int result;
81 83
84 prefetchw(&v->counter);
82 __asm__ __volatile__("@ atomic_sub\n" 85 __asm__ __volatile__("@ atomic_sub\n"
83"1: ldrex %0, [%3]\n" 86"1: ldrex %0, [%3]\n"
84" sub %0, %0, %4\n" 87" sub %0, %0, %4\n"
@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
138{ 141{
139 unsigned long tmp, tmp2; 142 unsigned long tmp, tmp2;
140 143
144 prefetchw(addr);
141 __asm__ __volatile__("@ atomic_clear_mask\n" 145 __asm__ __volatile__("@ atomic_clear_mask\n"
142"1: ldrex %0, [%3]\n" 146"1: ldrex %0, [%3]\n"
143" bic %0, %0, %4\n" 147" bic %0, %0, %4\n"
@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i)
283{ 287{
284 u64 tmp; 288 u64 tmp;
285 289
290 prefetchw(&v->counter);
286 __asm__ __volatile__("@ atomic64_set\n" 291 __asm__ __volatile__("@ atomic64_set\n"
287"1: ldrexd %0, %H0, [%2]\n" 292"1: ldrexd %0, %H0, [%2]\n"
288" strexd %0, %3, %H3, [%2]\n" 293" strexd %0, %3, %H3, [%2]\n"
@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
299 u64 result; 304 u64 result;
300 unsigned long tmp; 305 unsigned long tmp;
301 306
307 prefetchw(&v->counter);
302 __asm__ __volatile__("@ atomic64_add\n" 308 __asm__ __volatile__("@ atomic64_add\n"
303"1: ldrexd %0, %H0, [%3]\n" 309"1: ldrexd %0, %H0, [%3]\n"
304" adds %0, %0, %4\n" 310" adds %0, %0, %4\n"
@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
339 u64 result; 345 u64 result;
340 unsigned long tmp; 346 unsigned long tmp;
341 347
348 prefetchw(&v->counter);
342 __asm__ __volatile__("@ atomic64_sub\n" 349 __asm__ __volatile__("@ atomic64_sub\n"
343"1: ldrexd %0, %H0, [%3]\n" 350"1: ldrexd %0, %H0, [%3]\n"
344" subs %0, %0, %4\n" 351" subs %0, %0, %4\n"