diff options
author | Will Deacon <will.deacon@arm.com> | 2014-02-21 11:01:48 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-02-25 06:35:08 -0500 |
commit | db38ee874c48713d0723221d08332242e0088970 (patch) | |
tree | 065fee530d0374306564ea37606617f007af6c64 | |
parent | d98b90ea22b0a28d9d787769704a9cf1ea5a513a (diff) |
ARM: 7983/1: atomics: implement a better __atomic_add_unless for v6+
Looking at perf profiles of multi-threaded hackbench runs, a significant
performance hit appears to manifest from the cmpxchg loop used to
implement the 32-bit atomic_add_unless function. This can be mitigated
by writing a direct implementation of __atomic_add_unless which doesn't
require iteration outside of the atomic operation.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r-- | arch/arm/include/asm/atomic.h | 35 |
1 files changed, 31 insertions, 4 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 6e410090896e..9a92fd7864a8 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h | |||
@@ -141,6 +141,33 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |||
141 | return oldval; | 141 | return oldval; |
142 | } | 142 | } |
143 | 143 | ||
144 | static inline int __atomic_add_unless(atomic_t *v, int a, int u) | ||
145 | { | ||
146 | int oldval, newval; | ||
147 | unsigned long tmp; | ||
148 | |||
149 | smp_mb(); | ||
150 | prefetchw(&v->counter); | ||
151 | |||
152 | __asm__ __volatile__ ("@ atomic_add_unless\n" | ||
153 | "1: ldrex %0, [%4]\n" | ||
154 | " teq %0, %5\n" | ||
155 | " beq 2f\n" | ||
156 | " add %1, %0, %6\n" | ||
157 | " strex %2, %1, [%4]\n" | ||
158 | " teq %2, #0\n" | ||
159 | " bne 1b\n" | ||
160 | "2:" | ||
161 | : "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter) | ||
162 | : "r" (&v->counter), "r" (u), "r" (a) | ||
163 | : "cc"); | ||
164 | |||
165 | if (oldval != u) | ||
166 | smp_mb(); | ||
167 | |||
168 | return oldval; | ||
169 | } | ||
170 | |||
144 | #else /* ARM_ARCH_6 */ | 171 | #else /* ARM_ARCH_6 */ |
145 | 172 | ||
146 | #ifdef CONFIG_SMP | 173 | #ifdef CONFIG_SMP |
@@ -189,10 +216,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) | |||
189 | return ret; | 216 | return ret; |
190 | } | 217 | } |
191 | 218 | ||
192 | #endif /* __LINUX_ARM_ARCH__ */ | ||
193 | |||
194 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) | ||
195 | |||
196 | static inline int __atomic_add_unless(atomic_t *v, int a, int u) | 219 | static inline int __atomic_add_unless(atomic_t *v, int a, int u) |
197 | { | 220 | { |
198 | int c, old; | 221 | int c, old; |
@@ -203,6 +226,10 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) | |||
203 | return c; | 226 | return c; |
204 | } | 227 | } |
205 | 228 | ||
229 | #endif /* __LINUX_ARM_ARCH__ */ | ||
230 | |||
231 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) | ||
232 | |||
206 | #define atomic_inc(v) atomic_add(1, v) | 233 | #define atomic_inc(v) atomic_add(1, v) |
207 | #define atomic_dec(v) atomic_sub(1, v) | 234 | #define atomic_dec(v) atomic_sub(1, v) |
208 | 235 | ||