aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorVineet Gupta <vgupta@synopsys.com>2014-11-20 05:12:09 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2015-07-21 13:10:02 -0400
commiteb1eecd100ce48d4f8368a0c475ecb937abd40ec (patch)
tree439f91910db9156bf3ffed7015231f608fab11c7 /arch
parentf3ff4345ef597115869a227dbf738dde157f8521 (diff)
ARC: add smp barriers around atomics per Documentation/atomic_ops.txt
commit 2576c28e3f623ed401db7e6197241865328620ef upstream. - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers Since ARCv2 only provides load/load, store/store and all/all, we need the full barrier - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified values were lacking the explicit smp barriers. - Non LLOCK/SCOND varaints don't need the explicit barriers since that is implicity provided by the spin locks used to implement the critical section (the spin lock barriers in turn are also fixed in this commit as explained above Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/include/asm/atomic.h21
-rw-r--r--arch/arc/include/asm/bitops.h19
-rw-r--r--arch/arc/include/asm/cmpxchg.h17
-rw-r--r--arch/arc/include/asm/spinlock.h32
4 files changed, 89 insertions, 0 deletions
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 9917a45fc430..20b7dc17979e 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
43{ \ 43{ \
44 unsigned int temp; \ 44 unsigned int temp; \
45 \ 45 \
46 /* \
47 * Explicit full memory barrier needed before/after as \
48 * LLOCK/SCOND thmeselves don't provide any such semantics \
49 */ \
50 smp_mb(); \
51 \
46 __asm__ __volatile__( \ 52 __asm__ __volatile__( \
47 "1: llock %0, [%1] \n" \ 53 "1: llock %0, [%1] \n" \
48 " " #asm_op " %0, %0, %2 \n" \ 54 " " #asm_op " %0, %0, %2 \n" \
@@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
52 : "r"(&v->counter), "ir"(i) \ 58 : "r"(&v->counter), "ir"(i) \
53 : "cc"); \ 59 : "cc"); \
54 \ 60 \
61 smp_mb(); \
62 \
55 return temp; \ 63 return temp; \
56} 64}
57 65
@@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
105 unsigned long flags; \ 113 unsigned long flags; \
106 unsigned long temp; \ 114 unsigned long temp; \
107 \ 115 \
116 /* \
117 * spin lock/unlock provides the needed smp_mb() before/after \
118 */ \
108 atomic_ops_lock(flags); \ 119 atomic_ops_lock(flags); \
109 temp = v->counter; \ 120 temp = v->counter; \
110 temp c_op i; \ 121 temp c_op i; \
@@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and)
142#define __atomic_add_unless(v, a, u) \ 153#define __atomic_add_unless(v, a, u) \
143({ \ 154({ \
144 int c, old; \ 155 int c, old; \
156 \
157 /* \
158 * Explicit full memory barrier needed before/after as \
159 * LLOCK/SCOND thmeselves don't provide any such semantics \
160 */ \
161 smp_mb(); \
162 \
145 c = atomic_read(v); \ 163 c = atomic_read(v); \
146 while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ 164 while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
147 c = old; \ 165 c = old; \
166 \
167 smp_mb(); \
168 \
148 c; \ 169 c; \
149}) 170})
150 171
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 4051e9525939..624a9d048ca9 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -117,6 +117,12 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
117 if (__builtin_constant_p(nr)) 117 if (__builtin_constant_p(nr))
118 nr &= 0x1f; 118 nr &= 0x1f;
119 119
120 /*
121 * Explicit full memory barrier needed before/after as
122 * LLOCK/SCOND themselves don't provide any such semantics
123 */
124 smp_mb();
125
120 __asm__ __volatile__( 126 __asm__ __volatile__(
121 "1: llock %0, [%2] \n" 127 "1: llock %0, [%2] \n"
122 " bset %1, %0, %3 \n" 128 " bset %1, %0, %3 \n"
@@ -126,6 +132,8 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
126 : "r"(m), "ir"(nr) 132 : "r"(m), "ir"(nr)
127 : "cc"); 133 : "cc");
128 134
135 smp_mb();
136
129 return (old & (1 << nr)) != 0; 137 return (old & (1 << nr)) != 0;
130} 138}
131 139
@@ -139,6 +147,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
139 if (__builtin_constant_p(nr)) 147 if (__builtin_constant_p(nr))
140 nr &= 0x1f; 148 nr &= 0x1f;
141 149
150 smp_mb();
151
142 __asm__ __volatile__( 152 __asm__ __volatile__(
143 "1: llock %0, [%2] \n" 153 "1: llock %0, [%2] \n"
144 " bclr %1, %0, %3 \n" 154 " bclr %1, %0, %3 \n"
@@ -148,6 +158,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
148 : "r"(m), "ir"(nr) 158 : "r"(m), "ir"(nr)
149 : "cc"); 159 : "cc");
150 160
161 smp_mb();
162
151 return (old & (1 << nr)) != 0; 163 return (old & (1 << nr)) != 0;
152} 164}
153 165
@@ -161,6 +173,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m)
161 if (__builtin_constant_p(nr)) 173 if (__builtin_constant_p(nr))
162 nr &= 0x1f; 174 nr &= 0x1f;
163 175
176 smp_mb();
177
164 __asm__ __volatile__( 178 __asm__ __volatile__(
165 "1: llock %0, [%2] \n" 179 "1: llock %0, [%2] \n"
166 " bxor %1, %0, %3 \n" 180 " bxor %1, %0, %3 \n"
@@ -170,6 +184,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m)
170 : "r"(m), "ir"(nr) 184 : "r"(m), "ir"(nr)
171 : "cc"); 185 : "cc");
172 186
187 smp_mb();
188
173 return (old & (1 << nr)) != 0; 189 return (old & (1 << nr)) != 0;
174} 190}
175 191
@@ -249,6 +265,9 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
249 if (__builtin_constant_p(nr)) 265 if (__builtin_constant_p(nr))
250 nr &= 0x1f; 266 nr &= 0x1f;
251 267
268 /*
269 * spin lock/unlock provide the needed smp_mb() before/after
270 */
252 bitops_lock(flags); 271 bitops_lock(flags);
253 272
254 old = *m; 273 old = *m;
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 03cd6894855d..c9b1f461a587 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -10,6 +10,8 @@
10#define __ASM_ARC_CMPXCHG_H 10#define __ASM_ARC_CMPXCHG_H
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13
14#include <asm/barrier.h>
13#include <asm/smp.h> 15#include <asm/smp.h>
14 16
15#ifdef CONFIG_ARC_HAS_LLSC 17#ifdef CONFIG_ARC_HAS_LLSC
@@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
19{ 21{
20 unsigned long prev; 22 unsigned long prev;
21 23
24 /*
25 * Explicit full memory barrier needed before/after as
26 * LLOCK/SCOND thmeselves don't provide any such semantics
27 */
28 smp_mb();
29
22 __asm__ __volatile__( 30 __asm__ __volatile__(
23 "1: llock %0, [%1] \n" 31 "1: llock %0, [%1] \n"
24 " brne %0, %2, 2f \n" 32 " brne %0, %2, 2f \n"
@@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
30 "r"(new) /* can't be "ir". scond can't take limm for "b" */ 38 "r"(new) /* can't be "ir". scond can't take limm for "b" */
31 : "cc"); 39 : "cc");
32 40
41 smp_mb();
42
33 return prev; 43 return prev;
34} 44}
35 45
@@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
42 int prev; 52 int prev;
43 volatile unsigned long *p = ptr; 53 volatile unsigned long *p = ptr;
44 54
55 /*
56 * spin lock/unlock provide the needed smp_mb() before/after
57 */
45 atomic_ops_lock(flags); 58 atomic_ops_lock(flags);
46 prev = *p; 59 prev = *p;
47 if (prev == expected) 60 if (prev == expected)
@@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
77 90
78 switch (size) { 91 switch (size) {
79 case 4: 92 case 4:
93 smp_mb();
94
80 __asm__ __volatile__( 95 __asm__ __volatile__(
81 " ex %0, [%1] \n" 96 " ex %0, [%1] \n"
82 : "+r"(val) 97 : "+r"(val)
83 : "r"(ptr) 98 : "r"(ptr)
84 : "memory"); 99 : "memory");
85 100
101 smp_mb();
102
86 return val; 103 return val;
87 } 104 }
88 return __xchg_bad_pointer(); 105 return __xchg_bad_pointer();
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index b6a8c2dfbe6e..e1651df6a93d 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
22{ 22{
23 unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; 23 unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
24 24
25 /*
26 * This smp_mb() is technically superfluous, we only need the one
27 * after the lock for providing the ACQUIRE semantics.
28 * However doing the "right" thing was regressing hackbench
29 * so keeping this, pending further investigation
30 */
31 smp_mb();
32
25 __asm__ __volatile__( 33 __asm__ __volatile__(
26 "1: ex %0, [%1] \n" 34 "1: ex %0, [%1] \n"
27 " breq %0, %2, 1b \n" 35 " breq %0, %2, 1b \n"
28 : "+&r" (tmp) 36 : "+&r" (tmp)
29 : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) 37 : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
30 : "memory"); 38 : "memory");
39
40 /*
41 * ACQUIRE barrier to ensure load/store after taking the lock
42 * don't "bleed-up" out of the critical section (leak-in is allowed)
43 * http://www.spinics.net/lists/kernel/msg2010409.html
44 *
45 * ARCv2 only has load-load, store-store and all-all barrier
46 * thus need the full all-all barrier
47 */
48 smp_mb();
31} 49}
32 50
33static inline int arch_spin_trylock(arch_spinlock_t *lock) 51static inline int arch_spin_trylock(arch_spinlock_t *lock)
34{ 52{
35 unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; 53 unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
36 54
55 smp_mb();
56
37 __asm__ __volatile__( 57 __asm__ __volatile__(
38 "1: ex %0, [%1] \n" 58 "1: ex %0, [%1] \n"
39 : "+r" (tmp) 59 : "+r" (tmp)
40 : "r"(&(lock->slock)) 60 : "r"(&(lock->slock))
41 : "memory"); 61 : "memory");
42 62
63 smp_mb();
64
43 return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); 65 return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
44} 66}
45 67
@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
47{ 69{
48 unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; 70 unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
49 71
72 /*
73 * RELEASE barrier: given the instructions avail on ARCv2, full barrier
74 * is the only option
75 */
76 smp_mb();
77
50 __asm__ __volatile__( 78 __asm__ __volatile__(
51 " ex %0, [%1] \n" 79 " ex %0, [%1] \n"
52 : "+r" (tmp) 80 : "+r" (tmp)
53 : "r"(&(lock->slock)) 81 : "r"(&(lock->slock))
54 : "memory"); 82 : "memory");
55 83
84 /*
85 * superfluous, but keeping for now - see pairing version in
86 * arch_spin_lock above
87 */
56 smp_mb(); 88 smp_mb();
57} 89}
58 90