summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2019-06-13 09:43:19 -0400
committerPaul Burton <paul.burton@mips.com>2019-08-31 06:05:17 -0400
commit1c6c1ca318585f1096d4d04bc722297c85e9fb8a (patch)
treea3337041a74654c9427dbbaeca1a7a917db996e3
parentdfc8d8de855d566eb83a27e58a69741de42a90da (diff)
mips/atomic: Fix loongson_llsc_mb() wreckage
The comment describing the loongson_llsc_mb() reorder case doesn't make any sense what so ever. Instruction re-ordering is not an SMP artifact, but rather a CPU local phenomenon. Clarify the comment by explaining that these issue cause a coherence fail. For the branch speculation case; if futex_atomic_cmpxchg_inatomic() needs one at the bne branch target, then surely the normal __cmpxch_asm() implementation does too. We cannot rely on the barriers from cmpxchg() because cmpxchg_local() is implemented with the same macro, and branch prediction and speculation are, too, CPU local. Fixes: e02e07e3127d ("MIPS: Loongson: Introduce and use loongson_llsc_mb()") Cc: Huacai Chen <chenhc@lemote.com> Cc: Huang Pei <huangpei@loongson.cn> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Paul Burton <paul.burton@mips.com>
-rw-r--r--arch/mips/include/asm/atomic.h5
-rw-r--r--arch/mips/include/asm/barrier.h32
-rw-r--r--arch/mips/include/asm/bitops.h5
-rw-r--r--arch/mips/include/asm/cmpxchg.h5
-rw-r--r--arch/mips/kernel/syscall.c1
5 files changed, 32 insertions, 16 deletions
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 9a82dd11c0e9..190f1b615122 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -193,6 +193,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
193 if (kernel_uses_llsc) { 193 if (kernel_uses_llsc) {
194 int temp; 194 int temp;
195 195
196 loongson_llsc_mb();
196 __asm__ __volatile__( 197 __asm__ __volatile__(
197 " .set push \n" 198 " .set push \n"
198 " .set "MIPS_ISA_LEVEL" \n" 199 " .set "MIPS_ISA_LEVEL" \n"
@@ -200,12 +201,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
200 " .set pop \n" 201 " .set pop \n"
201 " subu %0, %1, %3 \n" 202 " subu %0, %1, %3 \n"
202 " move %1, %0 \n" 203 " move %1, %0 \n"
203 " bltz %0, 1f \n" 204 " bltz %0, 2f \n"
204 " .set push \n" 205 " .set push \n"
205 " .set "MIPS_ISA_LEVEL" \n" 206 " .set "MIPS_ISA_LEVEL" \n"
206 " sc %1, %2 \n" 207 " sc %1, %2 \n"
207 "\t" __scbeqz " %1, 1b \n" 208 "\t" __scbeqz " %1, 1b \n"
208 "1: \n" 209 "2: \n"
209 " .set pop \n" 210 " .set pop \n"
210 : "=&r" (result), "=&r" (temp), 211 : "=&r" (result), "=&r" (temp),
211 "+" GCC_OFF_SMALL_ASM() (v->counter) 212 "+" GCC_OFF_SMALL_ASM() (v->counter)
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index b865e317a14f..f9a6da96aae1 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -238,36 +238,40 @@
238 238
239/* 239/*
240 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, 240 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
241 * store or pref) in between an ll & sc can cause the sc instruction to 241 * store or prefetch) in between an LL & SC can cause the SC instruction to
242 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code 242 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
243 * containing such sequences, this bug bites harder than we might otherwise 243 * containing such sequences, this bug bites harder than we might otherwise
244 * expect due to reordering & speculation: 244 * expect due to reordering & speculation:
245 * 245 *
246 * 1) A memory access appearing prior to the ll in program order may actually 246 * 1) A memory access appearing prior to the LL in program order may actually
247 * be executed after the ll - this is the reordering case. 247 * be executed after the LL - this is the reordering case.
248 * 248 *
249 * In order to avoid this we need to place a memory barrier (ie. a sync 249 * In order to avoid this we need to place a memory barrier (ie. a SYNC
250 * instruction) prior to every ll instruction, in between it & any earlier 250 * instruction) prior to every LL instruction, in between it and any earlier
251 * memory access instructions. Many of these cases are already covered by 251 * memory access instructions.
252 * smp_mb__before_llsc() but for the remaining cases, typically ones in
253 * which multiple CPUs may operate on a memory location but ordering is not
254 * usually guaranteed, we use loongson_llsc_mb() below.
255 * 252 *
256 * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. 253 * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
257 * 254 *
258 * 2) If a conditional branch exists between an ll & sc with a target outside 255 * 2) If a conditional branch exists between an LL & SC with a target outside
259 * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg() 256 * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
260 * or similar, then misprediction of the branch may allow speculative 257 * or similar, then misprediction of the branch may allow speculative
261 * execution of memory accesses from outside of the ll-sc loop. 258 * execution of memory accesses from outside of the LL-SC loop.
262 * 259 *
263 * In order to avoid this we need a memory barrier (ie. a sync instruction) 260 * In order to avoid this we need a memory barrier (ie. a SYNC instruction)
264 * at each affected branch target, for which we also use loongson_llsc_mb() 261 * at each affected branch target, for which we also use loongson_llsc_mb()
265 * defined below. 262 * defined below.
266 * 263 *
267 * This case affects all current Loongson 3 CPUs. 264 * This case affects all current Loongson 3 CPUs.
265 *
266 * The above described cases cause an error in the cache coherence protocol;
267 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
268 * erroneously observes its core still has Exclusive state and lets the SC
269 * proceed.
270 *
271 * Therefore the error only occurs on SMP systems.
268 */ 272 */
269#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ 273#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
270#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") 274#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory")
271#else 275#else
272#define loongson_llsc_mb() do { } while (0) 276#define loongson_llsc_mb() do { } while (0)
273#endif 277#endif
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 9a466dde9b96..7bd35e5e2a9e 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -249,6 +249,7 @@ static inline int test_and_set_bit(unsigned long nr,
249 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); 249 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
250 unsigned long temp; 250 unsigned long temp;
251 251
252 loongson_llsc_mb();
252 do { 253 do {
253 __asm__ __volatile__( 254 __asm__ __volatile__(
254 " .set push \n" 255 " .set push \n"
@@ -305,6 +306,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
305 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); 306 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
306 unsigned long temp; 307 unsigned long temp;
307 308
309 loongson_llsc_mb();
308 do { 310 do {
309 __asm__ __volatile__( 311 __asm__ __volatile__(
310 " .set push \n" 312 " .set push \n"
@@ -364,6 +366,7 @@ static inline int test_and_clear_bit(unsigned long nr,
364 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); 366 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
365 unsigned long temp; 367 unsigned long temp;
366 368
369 loongson_llsc_mb();
367 do { 370 do {
368 __asm__ __volatile__( 371 __asm__ __volatile__(
369 " " __LL "%0, %1 # test_and_clear_bit \n" 372 " " __LL "%0, %1 # test_and_clear_bit \n"
@@ -379,6 +382,7 @@ static inline int test_and_clear_bit(unsigned long nr,
379 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); 382 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
380 unsigned long temp; 383 unsigned long temp;
381 384
385 loongson_llsc_mb();
382 do { 386 do {
383 __asm__ __volatile__( 387 __asm__ __volatile__(
384 " .set push \n" 388 " .set push \n"
@@ -438,6 +442,7 @@ static inline int test_and_change_bit(unsigned long nr,
438 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); 442 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
439 unsigned long temp; 443 unsigned long temp;
440 444
445 loongson_llsc_mb();
441 do { 446 do {
442 __asm__ __volatile__( 447 __asm__ __volatile__(
443 " .set push \n" 448 " .set push \n"
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 59cf5cbb3883..2053a842d262 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -46,6 +46,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
46 __typeof(*(m)) __ret; \ 46 __typeof(*(m)) __ret; \
47 \ 47 \
48 if (kernel_uses_llsc) { \ 48 if (kernel_uses_llsc) { \
49 loongson_llsc_mb(); \
49 __asm__ __volatile__( \ 50 __asm__ __volatile__( \
50 " .set push \n" \ 51 " .set push \n" \
51 " .set noat \n" \ 52 " .set noat \n" \
@@ -117,6 +118,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
117 __typeof(*(m)) __ret; \ 118 __typeof(*(m)) __ret; \
118 \ 119 \
119 if (kernel_uses_llsc) { \ 120 if (kernel_uses_llsc) { \
121 loongson_llsc_mb(); \
120 __asm__ __volatile__( \ 122 __asm__ __volatile__( \
121 " .set push \n" \ 123 " .set push \n" \
122 " .set noat \n" \ 124 " .set noat \n" \
@@ -134,6 +136,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
134 : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \ 136 : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \
135 : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \ 137 : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \
136 : "memory"); \ 138 : "memory"); \
139 loongson_llsc_mb(); \
137 } else { \ 140 } else { \
138 unsigned long __flags; \ 141 unsigned long __flags; \
139 \ 142 \
@@ -229,6 +232,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
229 */ 232 */
230 local_irq_save(flags); 233 local_irq_save(flags);
231 234
235 loongson_llsc_mb();
232 asm volatile( 236 asm volatile(
233 " .set push \n" 237 " .set push \n"
234 " .set " MIPS_ISA_ARCH_LEVEL " \n" 238 " .set " MIPS_ISA_ARCH_LEVEL " \n"
@@ -274,6 +278,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
274 "r" (old), 278 "r" (old),
275 "r" (new) 279 "r" (new)
276 : "memory"); 280 : "memory");
281 loongson_llsc_mb();
277 282
278 local_irq_restore(flags); 283 local_irq_restore(flags);
279 return ret; 284 return ret;
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index b6dc78ad5d8c..b0e25e913bdb 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -132,6 +132,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
132 [efault] "i" (-EFAULT) 132 [efault] "i" (-EFAULT)
133 : "memory"); 133 : "memory");
134 } else if (cpu_has_llsc) { 134 } else if (cpu_has_llsc) {
135 loongson_llsc_mb();
135 __asm__ __volatile__ ( 136 __asm__ __volatile__ (
136 " .set push \n" 137 " .set push \n"
137 " .set "MIPS_ISA_ARCH_LEVEL" \n" 138 " .set "MIPS_ISA_ARCH_LEVEL" \n"