diff options
author | Peter Zijlstra <peterz@infradead.org> | 2019-06-13 09:43:19 -0400 |
---|---|---|
committer | Paul Burton <paul.burton@mips.com> | 2019-08-31 06:05:17 -0400 |
commit | 1c6c1ca318585f1096d4d04bc722297c85e9fb8a (patch) | |
tree | a3337041a74654c9427dbbaeca1a7a917db996e3 | |
parent | dfc8d8de855d566eb83a27e58a69741de42a90da (diff) |
mips/atomic: Fix loongson_llsc_mb() wreckage
The comment describing the loongson_llsc_mb() reorder case doesn't
make any sense what so ever. Instruction re-ordering is not an SMP
artifact, but rather a CPU local phenomenon. Clarify the comment by
explaining that these issue cause a coherence fail.
For the branch speculation case; if futex_atomic_cmpxchg_inatomic()
needs one at the bne branch target, then surely the normal
__cmpxch_asm() implementation does too. We cannot rely on the
barriers from cmpxchg() because cmpxchg_local() is implemented with
the same macro, and branch prediction and speculation are, too, CPU
local.
Fixes: e02e07e3127d ("MIPS: Loongson: Introduce and use loongson_llsc_mb()")
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Huang Pei <huangpei@loongson.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
-rw-r--r-- | arch/mips/include/asm/atomic.h | 5 | ||||
-rw-r--r-- | arch/mips/include/asm/barrier.h | 32 | ||||
-rw-r--r-- | arch/mips/include/asm/bitops.h | 5 | ||||
-rw-r--r-- | arch/mips/include/asm/cmpxchg.h | 5 | ||||
-rw-r--r-- | arch/mips/kernel/syscall.c | 1 |
5 files changed, 32 insertions, 16 deletions
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 9a82dd11c0e9..190f1b615122 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h | |||
@@ -193,6 +193,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) | |||
193 | if (kernel_uses_llsc) { | 193 | if (kernel_uses_llsc) { |
194 | int temp; | 194 | int temp; |
195 | 195 | ||
196 | loongson_llsc_mb(); | ||
196 | __asm__ __volatile__( | 197 | __asm__ __volatile__( |
197 | " .set push \n" | 198 | " .set push \n" |
198 | " .set "MIPS_ISA_LEVEL" \n" | 199 | " .set "MIPS_ISA_LEVEL" \n" |
@@ -200,12 +201,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) | |||
200 | " .set pop \n" | 201 | " .set pop \n" |
201 | " subu %0, %1, %3 \n" | 202 | " subu %0, %1, %3 \n" |
202 | " move %1, %0 \n" | 203 | " move %1, %0 \n" |
203 | " bltz %0, 1f \n" | 204 | " bltz %0, 2f \n" |
204 | " .set push \n" | 205 | " .set push \n" |
205 | " .set "MIPS_ISA_LEVEL" \n" | 206 | " .set "MIPS_ISA_LEVEL" \n" |
206 | " sc %1, %2 \n" | 207 | " sc %1, %2 \n" |
207 | "\t" __scbeqz " %1, 1b \n" | 208 | "\t" __scbeqz " %1, 1b \n" |
208 | "1: \n" | 209 | "2: \n" |
209 | " .set pop \n" | 210 | " .set pop \n" |
210 | : "=&r" (result), "=&r" (temp), | 211 | : "=&r" (result), "=&r" (temp), |
211 | "+" GCC_OFF_SMALL_ASM() (v->counter) | 212 | "+" GCC_OFF_SMALL_ASM() (v->counter) |
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index b865e317a14f..f9a6da96aae1 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h | |||
@@ -238,36 +238,40 @@ | |||
238 | 238 | ||
239 | /* | 239 | /* |
240 | * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, | 240 | * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, |
241 | * store or pref) in between an ll & sc can cause the sc instruction to | 241 | * store or prefetch) in between an LL & SC can cause the SC instruction to |
242 | * erroneously succeed, breaking atomicity. Whilst it's unusual to write code | 242 | * erroneously succeed, breaking atomicity. Whilst it's unusual to write code |
243 | * containing such sequences, this bug bites harder than we might otherwise | 243 | * containing such sequences, this bug bites harder than we might otherwise |
244 | * expect due to reordering & speculation: | 244 | * expect due to reordering & speculation: |
245 | * | 245 | * |
246 | * 1) A memory access appearing prior to the ll in program order may actually | 246 | * 1) A memory access appearing prior to the LL in program order may actually |
247 | * be executed after the ll - this is the reordering case. | 247 | * be executed after the LL - this is the reordering case. |
248 | * | 248 | * |
249 | * In order to avoid this we need to place a memory barrier (ie. a sync | 249 | * In order to avoid this we need to place a memory barrier (ie. a SYNC |
250 | * instruction) prior to every ll instruction, in between it & any earlier | 250 | * instruction) prior to every LL instruction, in between it and any earlier |
251 | * memory access instructions. Many of these cases are already covered by | 251 | * memory access instructions. |
252 | * smp_mb__before_llsc() but for the remaining cases, typically ones in | ||
253 | * which multiple CPUs may operate on a memory location but ordering is not | ||
254 | * usually guaranteed, we use loongson_llsc_mb() below. | ||
255 | * | 252 | * |
256 | * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. | 253 | * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. |
257 | * | 254 | * |
258 | * 2) If a conditional branch exists between an ll & sc with a target outside | 255 | * 2) If a conditional branch exists between an LL & SC with a target outside |
259 | * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg() | 256 | * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() |
260 | * or similar, then misprediction of the branch may allow speculative | 257 | * or similar, then misprediction of the branch may allow speculative |
261 | * execution of memory accesses from outside of the ll-sc loop. | 258 | * execution of memory accesses from outside of the LL-SC loop. |
262 | * | 259 | * |
263 | * In order to avoid this we need a memory barrier (ie. a sync instruction) | 260 | * In order to avoid this we need a memory barrier (ie. a SYNC instruction) |
264 | * at each affected branch target, for which we also use loongson_llsc_mb() | 261 | * at each affected branch target, for which we also use loongson_llsc_mb() |
265 | * defined below. | 262 | * defined below. |
266 | * | 263 | * |
267 | * This case affects all current Loongson 3 CPUs. | 264 | * This case affects all current Loongson 3 CPUs. |
265 | * | ||
266 | * The above described cases cause an error in the cache coherence protocol; | ||
267 | * such that the Invalidate of a competing LL-SC goes 'missing' and SC | ||
268 | * erroneously observes its core still has Exclusive state and lets the SC | ||
269 | * proceed. | ||
270 | * | ||
271 | * Therefore the error only occurs on SMP systems. | ||
268 | */ | 272 | */ |
269 | #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ | 273 | #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ |
270 | #define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") | 274 | #define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory") |
271 | #else | 275 | #else |
272 | #define loongson_llsc_mb() do { } while (0) | 276 | #define loongson_llsc_mb() do { } while (0) |
273 | #endif | 277 | #endif |
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 9a466dde9b96..7bd35e5e2a9e 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h | |||
@@ -249,6 +249,7 @@ static inline int test_and_set_bit(unsigned long nr, | |||
249 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); | 249 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
250 | unsigned long temp; | 250 | unsigned long temp; |
251 | 251 | ||
252 | loongson_llsc_mb(); | ||
252 | do { | 253 | do { |
253 | __asm__ __volatile__( | 254 | __asm__ __volatile__( |
254 | " .set push \n" | 255 | " .set push \n" |
@@ -305,6 +306,7 @@ static inline int test_and_set_bit_lock(unsigned long nr, | |||
305 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); | 306 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
306 | unsigned long temp; | 307 | unsigned long temp; |
307 | 308 | ||
309 | loongson_llsc_mb(); | ||
308 | do { | 310 | do { |
309 | __asm__ __volatile__( | 311 | __asm__ __volatile__( |
310 | " .set push \n" | 312 | " .set push \n" |
@@ -364,6 +366,7 @@ static inline int test_and_clear_bit(unsigned long nr, | |||
364 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); | 366 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
365 | unsigned long temp; | 367 | unsigned long temp; |
366 | 368 | ||
369 | loongson_llsc_mb(); | ||
367 | do { | 370 | do { |
368 | __asm__ __volatile__( | 371 | __asm__ __volatile__( |
369 | " " __LL "%0, %1 # test_and_clear_bit \n" | 372 | " " __LL "%0, %1 # test_and_clear_bit \n" |
@@ -379,6 +382,7 @@ static inline int test_and_clear_bit(unsigned long nr, | |||
379 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); | 382 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
380 | unsigned long temp; | 383 | unsigned long temp; |
381 | 384 | ||
385 | loongson_llsc_mb(); | ||
382 | do { | 386 | do { |
383 | __asm__ __volatile__( | 387 | __asm__ __volatile__( |
384 | " .set push \n" | 388 | " .set push \n" |
@@ -438,6 +442,7 @@ static inline int test_and_change_bit(unsigned long nr, | |||
438 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); | 442 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
439 | unsigned long temp; | 443 | unsigned long temp; |
440 | 444 | ||
445 | loongson_llsc_mb(); | ||
441 | do { | 446 | do { |
442 | __asm__ __volatile__( | 447 | __asm__ __volatile__( |
443 | " .set push \n" | 448 | " .set push \n" |
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 59cf5cbb3883..2053a842d262 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h | |||
@@ -46,6 +46,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void) | |||
46 | __typeof(*(m)) __ret; \ | 46 | __typeof(*(m)) __ret; \ |
47 | \ | 47 | \ |
48 | if (kernel_uses_llsc) { \ | 48 | if (kernel_uses_llsc) { \ |
49 | loongson_llsc_mb(); \ | ||
49 | __asm__ __volatile__( \ | 50 | __asm__ __volatile__( \ |
50 | " .set push \n" \ | 51 | " .set push \n" \ |
51 | " .set noat \n" \ | 52 | " .set noat \n" \ |
@@ -117,6 +118,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, | |||
117 | __typeof(*(m)) __ret; \ | 118 | __typeof(*(m)) __ret; \ |
118 | \ | 119 | \ |
119 | if (kernel_uses_llsc) { \ | 120 | if (kernel_uses_llsc) { \ |
121 | loongson_llsc_mb(); \ | ||
120 | __asm__ __volatile__( \ | 122 | __asm__ __volatile__( \ |
121 | " .set push \n" \ | 123 | " .set push \n" \ |
122 | " .set noat \n" \ | 124 | " .set noat \n" \ |
@@ -134,6 +136,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, | |||
134 | : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \ | 136 | : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \ |
135 | : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \ | 137 | : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \ |
136 | : "memory"); \ | 138 | : "memory"); \ |
139 | loongson_llsc_mb(); \ | ||
137 | } else { \ | 140 | } else { \ |
138 | unsigned long __flags; \ | 141 | unsigned long __flags; \ |
139 | \ | 142 | \ |
@@ -229,6 +232,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, | |||
229 | */ | 232 | */ |
230 | local_irq_save(flags); | 233 | local_irq_save(flags); |
231 | 234 | ||
235 | loongson_llsc_mb(); | ||
232 | asm volatile( | 236 | asm volatile( |
233 | " .set push \n" | 237 | " .set push \n" |
234 | " .set " MIPS_ISA_ARCH_LEVEL " \n" | 238 | " .set " MIPS_ISA_ARCH_LEVEL " \n" |
@@ -274,6 +278,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, | |||
274 | "r" (old), | 278 | "r" (old), |
275 | "r" (new) | 279 | "r" (new) |
276 | : "memory"); | 280 | : "memory"); |
281 | loongson_llsc_mb(); | ||
277 | 282 | ||
278 | local_irq_restore(flags); | 283 | local_irq_restore(flags); |
279 | return ret; | 284 | return ret; |
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index b6dc78ad5d8c..b0e25e913bdb 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c | |||
@@ -132,6 +132,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) | |||
132 | [efault] "i" (-EFAULT) | 132 | [efault] "i" (-EFAULT) |
133 | : "memory"); | 133 | : "memory"); |
134 | } else if (cpu_has_llsc) { | 134 | } else if (cpu_has_llsc) { |
135 | loongson_llsc_mb(); | ||
135 | __asm__ __volatile__ ( | 136 | __asm__ __volatile__ ( |
136 | " .set push \n" | 137 | " .set push \n" |
137 | " .set "MIPS_ISA_ARCH_LEVEL" \n" | 138 | " .set "MIPS_ISA_ARCH_LEVEL" \n" |