aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-02-21 11:01:48 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2014-02-25 06:35:08 -0500
commitdb38ee874c48713d0723221d08332242e0088970 (patch)
tree065fee530d0374306564ea37606617f007af6c64
parentd98b90ea22b0a28d9d787769704a9cf1ea5a513a (diff)
ARM: 7983/1: atomics: implement a better __atomic_add_unless for v6+
Looking at perf profiles of multi-threaded hackbench runs, a significant performance hit appears to manifest from the cmpxchg loop used to implement the 32-bit atomic_add_unless function. This can be mitigated by writing a direct implementation of __atomic_add_unless which doesn't require iteration outside of the atomic operation. Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/include/asm/atomic.h35
1 files changed, 31 insertions, 4 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 6e410090896e..9a92fd7864a8 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -141,6 +141,33 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
141 return oldval; 141 return oldval;
142} 142}
143 143
144static inline int __atomic_add_unless(atomic_t *v, int a, int u)
145{
146 int oldval, newval;
147 unsigned long tmp;
148
149 smp_mb();
150 prefetchw(&v->counter);
151
152 __asm__ __volatile__ ("@ atomic_add_unless\n"
153"1: ldrex %0, [%4]\n"
154" teq %0, %5\n"
155" beq 2f\n"
156" add %1, %0, %6\n"
157" strex %2, %1, [%4]\n"
158" teq %2, #0\n"
159" bne 1b\n"
160"2:"
161 : "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
162 : "r" (&v->counter), "r" (u), "r" (a)
163 : "cc");
164
165 if (oldval != u)
166 smp_mb();
167
168 return oldval;
169}
170
144#else /* ARM_ARCH_6 */ 171#else /* ARM_ARCH_6 */
145 172
146#ifdef CONFIG_SMP 173#ifdef CONFIG_SMP
@@ -189,10 +216,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
189 return ret; 216 return ret;
190} 217}
191 218
192#endif /* __LINUX_ARM_ARCH__ */
193
194#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
195
196static inline int __atomic_add_unless(atomic_t *v, int a, int u) 219static inline int __atomic_add_unless(atomic_t *v, int a, int u)
197{ 220{
198 int c, old; 221 int c, old;
@@ -203,6 +226,10 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
203 return c; 226 return c;
204} 227}
205 228
229#endif /* __LINUX_ARM_ARCH__ */
230
231#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
232
206#define atomic_inc(v) atomic_add(1, v) 233#define atomic_inc(v) atomic_add(1, v)
207#define atomic_dec(v) atomic_sub(1, v) 234#define atomic_dec(v) atomic_sub(1, v)
208 235