diff options
author | Russell King <rmk@dyn-67.arm.linux.org.uk> | 2009-05-25 15:58:00 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-05-28 14:39:27 -0400 |
commit | bac4e960b5ce2453d862beaf20e59aa68af3b43a (patch) | |
tree | 69ba3b450a769fa4a613a1f8c4e6454cdcfae5aa | |
parent | 290815710b51de23f9ed6799d3e0bb762d4f907c (diff) |
[ARM] barriers: improve xchg, bitops and atomic SMP barriers
Mathieu Desnoyers pointed out that the ARM barriers were lacking:
- cmpxchg, xchg and atomic add return need memory barriers on
architectures which can reorder the relative order in which memory
read/writes can be seen between CPUs, which seems to include recent
ARM architectures. Those barriers are currently missing on ARM.
- test_and_xxx_bit were missing SMP barriers.
So put these barriers in. Provide separate atomic_add/atomic_sub
operations which do not require barriers.
Reported-Reviewed-and-Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r-- | arch/arm/include/asm/assembler.h | 13 | ||||
-rw-r--r-- | arch/arm/include/asm/atomic.h | 61 | ||||
-rw-r--r-- | arch/arm/include/asm/system.h | 3 | ||||
-rw-r--r-- | arch/arm/kernel/entry-armv.S | 5 | ||||
-rw-r--r-- | arch/arm/lib/bitops.h | 2 |
5 files changed, 71 insertions, 13 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6116e4893c0a..15f8a092b700 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h | |||
@@ -114,3 +114,16 @@ | |||
114 | .align 3; \ | 114 | .align 3; \ |
115 | .long 9999b,9001f; \ | 115 | .long 9999b,9001f; \ |
116 | .previous | 116 | .previous |
117 | |||
118 | /* | ||
119 | * SMP data memory barrier | ||
120 | */ | ||
121 | .macro smp_dmb | ||
122 | #ifdef CONFIG_SMP | ||
123 | #if __LINUX_ARM_ARCH__ >= 7 | ||
124 | dmb | ||
125 | #elif __LINUX_ARM_ARCH__ == 6 | ||
126 | mcr p15, 0, r0, c7, c10, 5 @ dmb | ||
127 | #endif | ||
128 | #endif | ||
129 | .endm | ||
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index ee99723b3a6c..16b52f397983 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h | |||
@@ -44,11 +44,29 @@ static inline void atomic_set(atomic_t *v, int i) | |||
44 | : "cc"); | 44 | : "cc"); |
45 | } | 45 | } |
46 | 46 | ||
47 | static inline void atomic_add(int i, atomic_t *v) | ||
48 | { | ||
49 | unsigned long tmp; | ||
50 | int result; | ||
51 | |||
52 | __asm__ __volatile__("@ atomic_add\n" | ||
53 | "1: ldrex %0, [%2]\n" | ||
54 | " add %0, %0, %3\n" | ||
55 | " strex %1, %0, [%2]\n" | ||
56 | " teq %1, #0\n" | ||
57 | " bne 1b" | ||
58 | : "=&r" (result), "=&r" (tmp) | ||
59 | : "r" (&v->counter), "Ir" (i) | ||
60 | : "cc"); | ||
61 | } | ||
62 | |||
47 | static inline int atomic_add_return(int i, atomic_t *v) | 63 | static inline int atomic_add_return(int i, atomic_t *v) |
48 | { | 64 | { |
49 | unsigned long tmp; | 65 | unsigned long tmp; |
50 | int result; | 66 | int result; |
51 | 67 | ||
68 | smp_mb(); | ||
69 | |||
52 | __asm__ __volatile__("@ atomic_add_return\n" | 70 | __asm__ __volatile__("@ atomic_add_return\n" |
53 | "1: ldrex %0, [%2]\n" | 71 | "1: ldrex %0, [%2]\n" |
54 | " add %0, %0, %3\n" | 72 | " add %0, %0, %3\n" |
@@ -59,14 +77,34 @@ static inline int atomic_add_return(int i, atomic_t *v) | |||
59 | : "r" (&v->counter), "Ir" (i) | 77 | : "r" (&v->counter), "Ir" (i) |
60 | : "cc"); | 78 | : "cc"); |
61 | 79 | ||
80 | smp_mb(); | ||
81 | |||
62 | return result; | 82 | return result; |
63 | } | 83 | } |
64 | 84 | ||
85 | static inline void atomic_sub(int i, atomic_t *v) | ||
86 | { | ||
87 | unsigned long tmp; | ||
88 | int result; | ||
89 | |||
90 | __asm__ __volatile__("@ atomic_sub\n" | ||
91 | "1: ldrex %0, [%2]\n" | ||
92 | " sub %0, %0, %3\n" | ||
93 | " strex %1, %0, [%2]\n" | ||
94 | " teq %1, #0\n" | ||
95 | " bne 1b" | ||
96 | : "=&r" (result), "=&r" (tmp) | ||
97 | : "r" (&v->counter), "Ir" (i) | ||
98 | : "cc"); | ||
99 | } | ||
100 | |||
65 | static inline int atomic_sub_return(int i, atomic_t *v) | 101 | static inline int atomic_sub_return(int i, atomic_t *v) |
66 | { | 102 | { |
67 | unsigned long tmp; | 103 | unsigned long tmp; |
68 | int result; | 104 | int result; |
69 | 105 | ||
106 | smp_mb(); | ||
107 | |||
70 | __asm__ __volatile__("@ atomic_sub_return\n" | 108 | __asm__ __volatile__("@ atomic_sub_return\n" |
71 | "1: ldrex %0, [%2]\n" | 109 | "1: ldrex %0, [%2]\n" |
72 | " sub %0, %0, %3\n" | 110 | " sub %0, %0, %3\n" |
@@ -77,6 +115,8 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
77 | : "r" (&v->counter), "Ir" (i) | 115 | : "r" (&v->counter), "Ir" (i) |
78 | : "cc"); | 116 | : "cc"); |
79 | 117 | ||
118 | smp_mb(); | ||
119 | |||
80 | return result; | 120 | return result; |
81 | } | 121 | } |
82 | 122 | ||
@@ -84,6 +124,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |||
84 | { | 124 | { |
85 | unsigned long oldval, res; | 125 | unsigned long oldval, res; |
86 | 126 | ||
127 | smp_mb(); | ||
128 | |||
87 | do { | 129 | do { |
88 | __asm__ __volatile__("@ atomic_cmpxchg\n" | 130 | __asm__ __volatile__("@ atomic_cmpxchg\n" |
89 | "ldrex %1, [%2]\n" | 131 | "ldrex %1, [%2]\n" |
@@ -95,6 +137,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |||
95 | : "cc"); | 137 | : "cc"); |
96 | } while (res); | 138 | } while (res); |
97 | 139 | ||
140 | smp_mb(); | ||
141 | |||
98 | return oldval; | 142 | return oldval; |
99 | } | 143 | } |
100 | 144 | ||
@@ -135,6 +179,7 @@ static inline int atomic_add_return(int i, atomic_t *v) | |||
135 | 179 | ||
136 | return val; | 180 | return val; |
137 | } | 181 | } |
182 | #define atomic_add(i, v) (void) atomic_add_return(i, v) | ||
138 | 183 | ||
139 | static inline int atomic_sub_return(int i, atomic_t *v) | 184 | static inline int atomic_sub_return(int i, atomic_t *v) |
140 | { | 185 | { |
@@ -148,6 +193,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
148 | 193 | ||
149 | return val; | 194 | return val; |
150 | } | 195 | } |
196 | #define atomic_sub(i, v) (void) atomic_sub_return(i, v) | ||
151 | 197 | ||
152 | static inline int atomic_cmpxchg(atomic_t *v, int old, int new) | 198 | static inline int atomic_cmpxchg(atomic_t *v, int old, int new) |
153 | { | 199 | { |
@@ -187,10 +233,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
187 | } | 233 | } |
188 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) | 234 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) |
189 | 235 | ||
190 | #define atomic_add(i, v) (void) atomic_add_return(i, v) | 236 | #define atomic_inc(v) atomic_add(1, v) |
191 | #define atomic_inc(v) (void) atomic_add_return(1, v) | 237 | #define atomic_dec(v) atomic_sub(1, v) |
192 | #define atomic_sub(i, v) (void) atomic_sub_return(i, v) | ||
193 | #define atomic_dec(v) (void) atomic_sub_return(1, v) | ||
194 | 238 | ||
195 | #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) | 239 | #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) |
196 | #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) | 240 | #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) |
@@ -200,11 +244,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
200 | 244 | ||
201 | #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) | 245 | #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) |
202 | 246 | ||
203 | /* Atomic operations are already serializing on ARM */ | 247 | #define smp_mb__before_atomic_dec() smp_mb() |
204 | #define smp_mb__before_atomic_dec() barrier() | 248 | #define smp_mb__after_atomic_dec() smp_mb() |
205 | #define smp_mb__after_atomic_dec() barrier() | 249 | #define smp_mb__before_atomic_inc() smp_mb() |
206 | #define smp_mb__before_atomic_inc() barrier() | 250 | #define smp_mb__after_atomic_inc() smp_mb() |
207 | #define smp_mb__after_atomic_inc() barrier() | ||
208 | 251 | ||
209 | #include <asm-generic/atomic.h> | 252 | #include <asm-generic/atomic.h> |
210 | #endif | 253 | #endif |
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index bd4dc8ed53d5..7fce8f3b391d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h | |||
@@ -248,6 +248,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size | |||
248 | unsigned int tmp; | 248 | unsigned int tmp; |
249 | #endif | 249 | #endif |
250 | 250 | ||
251 | smp_mb(); | ||
252 | |||
251 | switch (size) { | 253 | switch (size) { |
252 | #if __LINUX_ARM_ARCH__ >= 6 | 254 | #if __LINUX_ARM_ARCH__ >= 6 |
253 | case 1: | 255 | case 1: |
@@ -307,6 +309,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size | |||
307 | __bad_xchg(ptr, size), ret = 0; | 309 | __bad_xchg(ptr, size), ret = 0; |
308 | break; | 310 | break; |
309 | } | 311 | } |
312 | smp_mb(); | ||
310 | 313 | ||
311 | return ret; | 314 | return ret; |
312 | } | 315 | } |
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index d662a2f1fd85..83b1da6b7baa 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S | |||
@@ -815,10 +815,7 @@ __kuser_helper_start: | |||
815 | */ | 815 | */ |
816 | 816 | ||
817 | __kuser_memory_barrier: @ 0xffff0fa0 | 817 | __kuser_memory_barrier: @ 0xffff0fa0 |
818 | 818 | smp_dmb | |
819 | #if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP) | ||
820 | mcr p15, 0, r0, c7, c10, 5 @ dmb | ||
821 | #endif | ||
822 | usr_ret lr | 819 | usr_ret lr |
823 | 820 | ||
824 | .align 5 | 821 | .align 5 |
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 2e787d40d599..c7f2627385e7 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h | |||
@@ -18,12 +18,14 @@ | |||
18 | mov r2, #1 | 18 | mov r2, #1 |
19 | add r1, r1, r0, lsr #3 @ Get byte offset | 19 | add r1, r1, r0, lsr #3 @ Get byte offset |
20 | mov r3, r2, lsl r3 @ create mask | 20 | mov r3, r2, lsl r3 @ create mask |
21 | smp_dmb | ||
21 | 1: ldrexb r2, [r1] | 22 | 1: ldrexb r2, [r1] |
22 | ands r0, r2, r3 @ save old value of bit | 23 | ands r0, r2, r3 @ save old value of bit |
23 | \instr r2, r2, r3 @ toggle bit | 24 | \instr r2, r2, r3 @ toggle bit |
24 | strexb ip, r2, [r1] | 25 | strexb ip, r2, [r1] |
25 | cmp ip, #0 | 26 | cmp ip, #0 |
26 | bne 1b | 27 | bne 1b |
28 | smp_dmb | ||
27 | cmp r0, #0 | 29 | cmp r0, #0 |
28 | movne r0, #1 | 30 | movne r0, #1 |
29 | 2: mov pc, lr | 31 | 2: mov pc, lr |