ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+

The open-coded mutex implementation for ARMv6+ cores suffers from a severe lack of barriers, so in the uncontended case we don't actually protect any accesses performed during the critical section. Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec code but optimised to remove a branch instruction, as the mutex fastpath was previously inlined. Now that this is executed out-of-line, we can reuse the atomic access code for the locking (in fact, we use the xchg code as this produces shorter critical sections). This patch uses the generic xchg based implementation for mutexes on ARMv6+, which introduces barriers to the lock/unlock operations and also has the benefit of removing a fair amount of inline assembly code. Cc: <stable@vger.kernel.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Nicolas Pitre <nico@linaro.org> Reported-by: Shan Kang <kangshan0910@gmail.com> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
author: Will Deacon <will.deacon@arm.com> 2012-07-13 14:15:40 -0400
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2012-07-31 05:30:41 -0400
commit: a76d7bd96d65fa5119adba97e1b58d95f2e78829 (patch)
tree: 9e179b07cce5b6d79018a0fbb9f18bc570f9c449 /arch/arm/include
parent: 98bd8b96b26db3399a48202318dca4aaa2515355 (diff)
1 files changed, 4 insertions, 115 deletions
diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
index 93226cf23ae0..b1479fd04a95 100644
--- a/arch/arm/include/asm/mutex.h
+++ b/arch/arm/include/asm/mutex.h
@@ -7,121 +7,10 @@
 */
 #ifndef _ASM_MUTEX_H
 #define _ASM_MUTEX_H
-#if __LINUX_ARM_ARCH__ < 6
-/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
-# include <asm-generic/mutex-xchg.h>
-#else
 /*
- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
+ * On pre-ARMv6 hardware this results in a swp-based implementation,
- * atomic decrement (it is not a reliable atomic decrement but it satisfies
+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive
- * the defined semantics for our purpose, while being smaller and faster
+ * accesses instead.
- * than a real atomic decrement or atomic swap.  The idea is to attempt
- * decrementing the lock value only once.  If once decremented it isn't zero,
- * or if its store-back fails due to a dispute on the exclusive store, we
- * simply bail out immediately through the slow path where the lock will be
- * reattempted until it succeeds.
 */
-static inline void
+#include <asm-generic/mutex-xchg.h>
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-        int __ex_flag, __res;
-        __asm__ (
-                "ldrex  %0, [%2]        \n\t"
-                "sub    %0, %0, #1      \n\t"
-                "strex  %1, %0, [%2]    "
-                : "=&r" (__res), "=&r" (__ex_flag)
-                : "r" (&(count)->counter)
-                : "cc","memory" );
-        __res |= __ex_flag;
-        if (unlikely(__res != 0))
-                fail_fn(count);
-}
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-        int __ex_flag, __res;
-        __asm__ (
-                "ldrex  %0, [%2]        \n\t"
-                "sub    %0, %0, #1      \n\t"
-                "strex  %1, %0, [%2]    "
-                : "=&r" (__res), "=&r" (__ex_flag)
-                : "r" (&(count)->counter)
-                : "cc","memory" );
-        __res |= __ex_flag;
-        if (unlikely(__res != 0))
-                __res = fail_fn(count);
-        return __res;
-}
-/*
- * Same trick is used for the unlock fast path. However the original value,
- * rather than the result, is used to test for success in order to have
- * better generated assembly.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-        int __ex_flag, __res, __orig;
-        __asm__ (
-                "ldrex  %0, [%3]        \n\t"
-                "add    %1, %0, #1      \n\t"
-                "strex  %2, %1, [%3]    "
-                : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-                : "r" (&(count)->counter)
-                : "cc","memory" );
-        __orig |= __ex_flag;
-        if (unlikely(__orig != 0))
-                fail_fn(count);
-}
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()      1
-/*
- * For __mutex_fastpath_trylock we use another construct which could be
- * described as a "single value cmpxchg".
- *
- * This provides the needed trylock semantics like cmpxchg would, but it is
- * lighter and less generic than a true cmpxchg implementation.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-        int __ex_flag, __res, __orig;
-        __asm__ (
-                "1: ldrex       %0, [%3]        \n\t"
-                "subs           %1, %0, #1      \n\t"
-                "strexeq        %2, %1, [%3]    \n\t"
-                "movlt          %0, #0          \n\t"
-                "cmpeq          %2, #0          \n\t"
-                "bgt            1b              "
-                : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-                : "r" (&count->counter)
-                : "cc", "memory" );
-        return __orig;
-}
-#endif
 #endif
author	Will Deacon <will.deacon@arm.com>	2012-07-13 14:15:40 -0400
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2012-07-31 05:30:41 -0400
commit	a76d7bd96d65fa5119adba97e1b58d95f2e78829 (patch)
tree	9e179b07cce5b6d79018a0fbb9f18bc570f9c449 /arch/arm/include
parent	98bd8b96b26db3399a48202318dca4aaa2515355 (diff)

diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h index 93226cf23ae0..b1479fd04a95 100644 --- a/arch/arm/include/asm/mutex.h +++ b/arch/arm/include/asm/mutex.h
@@ -7,121 +7,10 @@
7	*/	7	*/
8	#ifndef _ASM_MUTEX_H	8	#ifndef _ASM_MUTEX_H
9	#define _ASM_MUTEX_H	9	#define _ASM_MUTEX_H
10
11	#if __LINUX_ARM_ARCH__ < 6
12	/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
13	# include <asm-generic/mutex-xchg.h>
14	#else
15
16	/*	10	/*
17	* Attempting to lock a mutex on ARMv6+ can be done with a bastardized	11	* On pre-ARMv6 hardware this results in a swp-based implementation,
18	* atomic decrement (it is not a reliable atomic decrement but it satisfies	12	* which is the most efficient. For ARMv6+, we emit a pair of exclusive
19	* the defined semantics for our purpose, while being smaller and faster	13	* accesses instead.
20	* than a real atomic decrement or atomic swap. The idea is to attempt
21	* decrementing the lock value only once. If once decremented it isn't zero,
22	* or if its store-back fails due to a dispute on the exclusive store, we
23	* simply bail out immediately through the slow path where the lock will be
24	* reattempted until it succeeds.
25	*/	14	*/
26	static inline void	15	#include <asm-generic/mutex-xchg.h>
27	__mutex_fastpath_lock(atomic_t count, void (fail_fn)(atomic_t *))
28	{
29	int __ex_flag, __res;
30
31	__asm__ (
32
33	"ldrex %0, [%2] \n\t"
34	"sub %0, %0, #1 \n\t"
35	"strex %1, %0, [%2] "
36
37	: "=&r" (__res), "=&r" (__ex_flag)
38	: "r" (&(count)->counter)
39	: "cc","memory" );
40
41	__res \|= __ex_flag;
42	if (unlikely(__res != 0))
43	fail_fn(count);
44	}
45
46	static inline int
47	__mutex_fastpath_lock_retval(atomic_t count, int (fail_fn)(atomic_t *))
48	{
49	int __ex_flag, __res;
50
51	__asm__ (
52
53	"ldrex %0, [%2] \n\t"
54	"sub %0, %0, #1 \n\t"
55	"strex %1, %0, [%2] "
56
57	: "=&r" (__res), "=&r" (__ex_flag)
58	: "r" (&(count)->counter)
59	: "cc","memory" );
60
61	__res \|= __ex_flag;
62	if (unlikely(__res != 0))
63	__res = fail_fn(count);
64	return __res;
65	}
66
67	/*
68	* Same trick is used for the unlock fast path. However the original value,
69	* rather than the result, is used to test for success in order to have
70	* better generated assembly.
71	*/
72	static inline void
73	__mutex_fastpath_unlock(atomic_t count, void (fail_fn)(atomic_t *))
74	{
75	int __ex_flag, __res, __orig;
76
77	__asm__ (
78
79	"ldrex %0, [%3] \n\t"
80	"add %1, %0, #1 \n\t"
81	"strex %2, %1, [%3] "
82
83	: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
84	: "r" (&(count)->counter)
85	: "cc","memory" );
86
87	__orig \|= __ex_flag;
88	if (unlikely(__orig != 0))
89	fail_fn(count);
90	}
91
92	/*
93	* If the unlock was done on a contended lock, or if the unlock simply fails
94	* then the mutex remains locked.
95	*/
96	#define __mutex_slowpath_needs_to_unlock() 1
97
98	/*
99	* For __mutex_fastpath_trylock we use another construct which could be
100	* described as a "single value cmpxchg".
101	*
102	* This provides the needed trylock semantics like cmpxchg would, but it is
103	* lighter and less generic than a true cmpxchg implementation.
104	*/
105	static inline int
106	__mutex_fastpath_trylock(atomic_t count, int (fail_fn)(atomic_t *))
107	{
108	int __ex_flag, __res, __orig;
109
110	__asm__ (
111
112	"1: ldrex %0, [%3] \n\t"
113	"subs %1, %0, #1 \n\t"
114	"strexeq %2, %1, [%3] \n\t"
115	"movlt %0, #0 \n\t"
116	"cmpeq %2, #0 \n\t"
117	"bgt 1b "
118
119	: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
120	: "r" (&count->counter)
121	: "cc", "memory" );
122
123	return __orig;
124	}
125
126	#endif
127	#endif	16	#endif