diff options
author | Vineet Gupta <vgupta@synopsys.com> | 2014-11-20 05:12:09 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2015-07-21 13:10:02 -0400 |
commit | eb1eecd100ce48d4f8368a0c475ecb937abd40ec (patch) | |
tree | 439f91910db9156bf3ffed7015231f608fab11c7 /arch | |
parent | f3ff4345ef597115869a227dbf738dde157f8521 (diff) |
ARC: add smp barriers around atomics per Documentation/atomic_ops.txt
commit 2576c28e3f623ed401db7e6197241865328620ef upstream.
- arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers
Since ARCv2 only provides load/load, store/store and all/all, we need
the full barrier
- LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified
values were lacking the explicit smp barriers.
- Non LLOCK/SCOND varaints don't need the explicit barriers since that
is implicity provided by the spin locks used to implement the
critical section (the spin lock barriers in turn are also fixed in
this commit as explained above
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arc/include/asm/atomic.h | 21 | ||||
-rw-r--r-- | arch/arc/include/asm/bitops.h | 19 | ||||
-rw-r--r-- | arch/arc/include/asm/cmpxchg.h | 17 | ||||
-rw-r--r-- | arch/arc/include/asm/spinlock.h | 32 |
4 files changed, 89 insertions, 0 deletions
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430..20b7dc17979e 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h | |||
@@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ | |||
43 | { \ | 43 | { \ |
44 | unsigned int temp; \ | 44 | unsigned int temp; \ |
45 | \ | 45 | \ |
46 | /* \ | ||
47 | * Explicit full memory barrier needed before/after as \ | ||
48 | * LLOCK/SCOND thmeselves don't provide any such semantics \ | ||
49 | */ \ | ||
50 | smp_mb(); \ | ||
51 | \ | ||
46 | __asm__ __volatile__( \ | 52 | __asm__ __volatile__( \ |
47 | "1: llock %0, [%1] \n" \ | 53 | "1: llock %0, [%1] \n" \ |
48 | " " #asm_op " %0, %0, %2 \n" \ | 54 | " " #asm_op " %0, %0, %2 \n" \ |
@@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ | |||
52 | : "r"(&v->counter), "ir"(i) \ | 58 | : "r"(&v->counter), "ir"(i) \ |
53 | : "cc"); \ | 59 | : "cc"); \ |
54 | \ | 60 | \ |
61 | smp_mb(); \ | ||
62 | \ | ||
55 | return temp; \ | 63 | return temp; \ |
56 | } | 64 | } |
57 | 65 | ||
@@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ | |||
105 | unsigned long flags; \ | 113 | unsigned long flags; \ |
106 | unsigned long temp; \ | 114 | unsigned long temp; \ |
107 | \ | 115 | \ |
116 | /* \ | ||
117 | * spin lock/unlock provides the needed smp_mb() before/after \ | ||
118 | */ \ | ||
108 | atomic_ops_lock(flags); \ | 119 | atomic_ops_lock(flags); \ |
109 | temp = v->counter; \ | 120 | temp = v->counter; \ |
110 | temp c_op i; \ | 121 | temp c_op i; \ |
@@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) | |||
142 | #define __atomic_add_unless(v, a, u) \ | 153 | #define __atomic_add_unless(v, a, u) \ |
143 | ({ \ | 154 | ({ \ |
144 | int c, old; \ | 155 | int c, old; \ |
156 | \ | ||
157 | /* \ | ||
158 | * Explicit full memory barrier needed before/after as \ | ||
159 | * LLOCK/SCOND thmeselves don't provide any such semantics \ | ||
160 | */ \ | ||
161 | smp_mb(); \ | ||
162 | \ | ||
145 | c = atomic_read(v); \ | 163 | c = atomic_read(v); \ |
146 | while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ | 164 | while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ |
147 | c = old; \ | 165 | c = old; \ |
166 | \ | ||
167 | smp_mb(); \ | ||
168 | \ | ||
148 | c; \ | 169 | c; \ |
149 | }) | 170 | }) |
150 | 171 | ||
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939..624a9d048ca9 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h | |||
@@ -117,6 +117,12 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) | |||
117 | if (__builtin_constant_p(nr)) | 117 | if (__builtin_constant_p(nr)) |
118 | nr &= 0x1f; | 118 | nr &= 0x1f; |
119 | 119 | ||
120 | /* | ||
121 | * Explicit full memory barrier needed before/after as | ||
122 | * LLOCK/SCOND themselves don't provide any such semantics | ||
123 | */ | ||
124 | smp_mb(); | ||
125 | |||
120 | __asm__ __volatile__( | 126 | __asm__ __volatile__( |
121 | "1: llock %0, [%2] \n" | 127 | "1: llock %0, [%2] \n" |
122 | " bset %1, %0, %3 \n" | 128 | " bset %1, %0, %3 \n" |
@@ -126,6 +132,8 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) | |||
126 | : "r"(m), "ir"(nr) | 132 | : "r"(m), "ir"(nr) |
127 | : "cc"); | 133 | : "cc"); |
128 | 134 | ||
135 | smp_mb(); | ||
136 | |||
129 | return (old & (1 << nr)) != 0; | 137 | return (old & (1 << nr)) != 0; |
130 | } | 138 | } |
131 | 139 | ||
@@ -139,6 +147,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) | |||
139 | if (__builtin_constant_p(nr)) | 147 | if (__builtin_constant_p(nr)) |
140 | nr &= 0x1f; | 148 | nr &= 0x1f; |
141 | 149 | ||
150 | smp_mb(); | ||
151 | |||
142 | __asm__ __volatile__( | 152 | __asm__ __volatile__( |
143 | "1: llock %0, [%2] \n" | 153 | "1: llock %0, [%2] \n" |
144 | " bclr %1, %0, %3 \n" | 154 | " bclr %1, %0, %3 \n" |
@@ -148,6 +158,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) | |||
148 | : "r"(m), "ir"(nr) | 158 | : "r"(m), "ir"(nr) |
149 | : "cc"); | 159 | : "cc"); |
150 | 160 | ||
161 | smp_mb(); | ||
162 | |||
151 | return (old & (1 << nr)) != 0; | 163 | return (old & (1 << nr)) != 0; |
152 | } | 164 | } |
153 | 165 | ||
@@ -161,6 +173,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) | |||
161 | if (__builtin_constant_p(nr)) | 173 | if (__builtin_constant_p(nr)) |
162 | nr &= 0x1f; | 174 | nr &= 0x1f; |
163 | 175 | ||
176 | smp_mb(); | ||
177 | |||
164 | __asm__ __volatile__( | 178 | __asm__ __volatile__( |
165 | "1: llock %0, [%2] \n" | 179 | "1: llock %0, [%2] \n" |
166 | " bxor %1, %0, %3 \n" | 180 | " bxor %1, %0, %3 \n" |
@@ -170,6 +184,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) | |||
170 | : "r"(m), "ir"(nr) | 184 | : "r"(m), "ir"(nr) |
171 | : "cc"); | 185 | : "cc"); |
172 | 186 | ||
187 | smp_mb(); | ||
188 | |||
173 | return (old & (1 << nr)) != 0; | 189 | return (old & (1 << nr)) != 0; |
174 | } | 190 | } |
175 | 191 | ||
@@ -249,6 +265,9 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) | |||
249 | if (__builtin_constant_p(nr)) | 265 | if (__builtin_constant_p(nr)) |
250 | nr &= 0x1f; | 266 | nr &= 0x1f; |
251 | 267 | ||
268 | /* | ||
269 | * spin lock/unlock provide the needed smp_mb() before/after | ||
270 | */ | ||
252 | bitops_lock(flags); | 271 | bitops_lock(flags); |
253 | 272 | ||
254 | old = *m; | 273 | old = *m; |
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d..c9b1f461a587 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #define __ASM_ARC_CMPXCHG_H | 10 | #define __ASM_ARC_CMPXCHG_H |
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | |||
14 | #include <asm/barrier.h> | ||
13 | #include <asm/smp.h> | 15 | #include <asm/smp.h> |
14 | 16 | ||
15 | #ifdef CONFIG_ARC_HAS_LLSC | 17 | #ifdef CONFIG_ARC_HAS_LLSC |
@@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) | |||
19 | { | 21 | { |
20 | unsigned long prev; | 22 | unsigned long prev; |
21 | 23 | ||
24 | /* | ||
25 | * Explicit full memory barrier needed before/after as | ||
26 | * LLOCK/SCOND thmeselves don't provide any such semantics | ||
27 | */ | ||
28 | smp_mb(); | ||
29 | |||
22 | __asm__ __volatile__( | 30 | __asm__ __volatile__( |
23 | "1: llock %0, [%1] \n" | 31 | "1: llock %0, [%1] \n" |
24 | " brne %0, %2, 2f \n" | 32 | " brne %0, %2, 2f \n" |
@@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) | |||
30 | "r"(new) /* can't be "ir". scond can't take limm for "b" */ | 38 | "r"(new) /* can't be "ir". scond can't take limm for "b" */ |
31 | : "cc"); | 39 | : "cc"); |
32 | 40 | ||
41 | smp_mb(); | ||
42 | |||
33 | return prev; | 43 | return prev; |
34 | } | 44 | } |
35 | 45 | ||
@@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) | |||
42 | int prev; | 52 | int prev; |
43 | volatile unsigned long *p = ptr; | 53 | volatile unsigned long *p = ptr; |
44 | 54 | ||
55 | /* | ||
56 | * spin lock/unlock provide the needed smp_mb() before/after | ||
57 | */ | ||
45 | atomic_ops_lock(flags); | 58 | atomic_ops_lock(flags); |
46 | prev = *p; | 59 | prev = *p; |
47 | if (prev == expected) | 60 | if (prev == expected) |
@@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, | |||
77 | 90 | ||
78 | switch (size) { | 91 | switch (size) { |
79 | case 4: | 92 | case 4: |
93 | smp_mb(); | ||
94 | |||
80 | __asm__ __volatile__( | 95 | __asm__ __volatile__( |
81 | " ex %0, [%1] \n" | 96 | " ex %0, [%1] \n" |
82 | : "+r"(val) | 97 | : "+r"(val) |
83 | : "r"(ptr) | 98 | : "r"(ptr) |
84 | : "memory"); | 99 | : "memory"); |
85 | 100 | ||
101 | smp_mb(); | ||
102 | |||
86 | return val; | 103 | return val; |
87 | } | 104 | } |
88 | return __xchg_bad_pointer(); | 105 | return __xchg_bad_pointer(); |
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e..e1651df6a93d 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h | |||
@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
22 | { | 22 | { |
23 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; | 23 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; |
24 | 24 | ||
25 | /* | ||
26 | * This smp_mb() is technically superfluous, we only need the one | ||
27 | * after the lock for providing the ACQUIRE semantics. | ||
28 | * However doing the "right" thing was regressing hackbench | ||
29 | * so keeping this, pending further investigation | ||
30 | */ | ||
31 | smp_mb(); | ||
32 | |||
25 | __asm__ __volatile__( | 33 | __asm__ __volatile__( |
26 | "1: ex %0, [%1] \n" | 34 | "1: ex %0, [%1] \n" |
27 | " breq %0, %2, 1b \n" | 35 | " breq %0, %2, 1b \n" |
28 | : "+&r" (tmp) | 36 | : "+&r" (tmp) |
29 | : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) | 37 | : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) |
30 | : "memory"); | 38 | : "memory"); |
39 | |||
40 | /* | ||
41 | * ACQUIRE barrier to ensure load/store after taking the lock | ||
42 | * don't "bleed-up" out of the critical section (leak-in is allowed) | ||
43 | * http://www.spinics.net/lists/kernel/msg2010409.html | ||
44 | * | ||
45 | * ARCv2 only has load-load, store-store and all-all barrier | ||
46 | * thus need the full all-all barrier | ||
47 | */ | ||
48 | smp_mb(); | ||
31 | } | 49 | } |
32 | 50 | ||
33 | static inline int arch_spin_trylock(arch_spinlock_t *lock) | 51 | static inline int arch_spin_trylock(arch_spinlock_t *lock) |
34 | { | 52 | { |
35 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; | 53 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; |
36 | 54 | ||
55 | smp_mb(); | ||
56 | |||
37 | __asm__ __volatile__( | 57 | __asm__ __volatile__( |
38 | "1: ex %0, [%1] \n" | 58 | "1: ex %0, [%1] \n" |
39 | : "+r" (tmp) | 59 | : "+r" (tmp) |
40 | : "r"(&(lock->slock)) | 60 | : "r"(&(lock->slock)) |
41 | : "memory"); | 61 | : "memory"); |
42 | 62 | ||
63 | smp_mb(); | ||
64 | |||
43 | return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); | 65 | return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); |
44 | } | 66 | } |
45 | 67 | ||
@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) | |||
47 | { | 69 | { |
48 | unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; | 70 | unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; |
49 | 71 | ||
72 | /* | ||
73 | * RELEASE barrier: given the instructions avail on ARCv2, full barrier | ||
74 | * is the only option | ||
75 | */ | ||
76 | smp_mb(); | ||
77 | |||
50 | __asm__ __volatile__( | 78 | __asm__ __volatile__( |
51 | " ex %0, [%1] \n" | 79 | " ex %0, [%1] \n" |
52 | : "+r" (tmp) | 80 | : "+r" (tmp) |
53 | : "r"(&(lock->slock)) | 81 | : "r"(&(lock->slock)) |
54 | : "memory"); | 82 | : "memory"); |
55 | 83 | ||
84 | /* | ||
85 | * superfluous, but keeping for now - see pairing version in | ||
86 | * arch_spin_lock above | ||
87 | */ | ||
56 | smp_mb(); | 88 | smp_mb(); |
57 | } | 89 | } |
58 | 90 | ||