aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVineet Gupta <vgupta@synopsys.com>2015-07-14 10:20:18 -0400
committerVineet Gupta <vgupta@synopsys.com>2015-08-03 23:56:34 -0400
commite78fdfef84be13a5c2b8276e12203cdf24778596 (patch)
treee16fad7f1d73a651d2f02deefd8f20f5bf9a018e
parent69cbe630f54ec02efe47fdb9e257e617161da370 (diff)
ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
This is to workaround the llock/scond livelock HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping coherency transactions in the SCU. The exclusive line state keeps rotating among contenting cores leading to a never ending cycle. So break the cycle by deferring the retry of failed exclusive access (SCOND). The actual delay needed is function of number of contending cores as well as the unrelated coherency traffic from other cores. To keep the code simple, start off with small delay of 1 which would suffice most cases and in case of contention double the delay. Eventually the delay is sufficient such that the coherency pipeline is drained, thus a subsequent exclusive access would succeed. Link: http://lkml.kernel.org/r/1438612568-28265-1-git-send-email-vgupta@synopsys.com Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
-rw-r--r--arch/arc/Kconfig5
-rw-r--r--arch/arc/include/asm/atomic.h49
-rw-r--r--arch/arc/include/asm/spinlock.h293
-rw-r--r--arch/arc/kernel/setup.c4
4 files changed, 347 insertions, 4 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a5fccdfbfc8f..bd4670d1b89b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
365 default y 365 default y
366 depends on !ARC_CANT_LLSC 366 depends on !ARC_CANT_LLSC
367 367
368config ARC_STAR_9000923308
369 bool "Workaround for llock/scond livelock"
370 default y
371 depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
372
368config ARC_HAS_SWAPE 373config ARC_HAS_SWAPE
369 bool "Insn: SWAPE (endian-swap)" 374 bool "Insn: SWAPE (endian-swap)"
370 default y 375 default y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 3dd36c1efee1..629dfd0a0c6b 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,17 +23,51 @@
23 23
24#define atomic_set(v, i) (((v)->counter) = (i)) 24#define atomic_set(v, i) (((v)->counter) = (i))
25 25
26#ifdef CONFIG_ARC_STAR_9000923308
27
28#define SCOND_FAIL_RETRY_VAR_DEF \
29 unsigned int delay = 1, tmp; \
30
31#define SCOND_FAIL_RETRY_ASM \
32 " bz 4f \n" \
33 " ; --- scond fail delay --- \n" \
34 " mov %[tmp], %[delay] \n" /* tmp = delay */ \
35 "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
36 " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
37 " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \
38 " mov.z %[delay], 1 \n" /* handle overflow */ \
39 " b 1b \n" /* start over */ \
40 "4: ; --- success --- \n" \
41
42#define SCOND_FAIL_RETRY_VARS \
43 ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \
44
45#else /* !CONFIG_ARC_STAR_9000923308 */
46
47#define SCOND_FAIL_RETRY_VAR_DEF
48
49#define SCOND_FAIL_RETRY_ASM \
50 " bnz 1b \n" \
51
52#define SCOND_FAIL_RETRY_VARS
53
54#endif
55
26#define ATOMIC_OP(op, c_op, asm_op) \ 56#define ATOMIC_OP(op, c_op, asm_op) \
27static inline void atomic_##op(int i, atomic_t *v) \ 57static inline void atomic_##op(int i, atomic_t *v) \
28{ \ 58{ \
29 unsigned int val; \ 59 unsigned int val; \
60 SCOND_FAIL_RETRY_VAR_DEF \
30 \ 61 \
31 __asm__ __volatile__( \ 62 __asm__ __volatile__( \
32 "1: llock %[val], [%[ctr]] \n" \ 63 "1: llock %[val], [%[ctr]] \n" \
33 " " #asm_op " %[val], %[val], %[i] \n" \ 64 " " #asm_op " %[val], %[val], %[i] \n" \
34 " scond %[val], [%[ctr]] \n" \ 65 " scond %[val], [%[ctr]] \n" \
35 " bnz 1b \n" \ 66 " \n" \
67 SCOND_FAIL_RETRY_ASM \
68 \
36 : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ 69 : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
70 SCOND_FAIL_RETRY_VARS \
37 : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ 71 : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
38 [i] "ir" (i) \ 72 [i] "ir" (i) \
39 : "cc"); \ 73 : "cc"); \
@@ -42,7 +76,8 @@ static inline void atomic_##op(int i, atomic_t *v) \
42#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ 76#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
43static inline int atomic_##op##_return(int i, atomic_t *v) \ 77static inline int atomic_##op##_return(int i, atomic_t *v) \
44{ \ 78{ \
45 unsigned int val; \ 79 unsigned int val; \
80 SCOND_FAIL_RETRY_VAR_DEF \
46 \ 81 \
47 /* \ 82 /* \
48 * Explicit full memory barrier needed before/after as \ 83 * Explicit full memory barrier needed before/after as \
@@ -54,8 +89,11 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
54 "1: llock %[val], [%[ctr]] \n" \ 89 "1: llock %[val], [%[ctr]] \n" \
55 " " #asm_op " %[val], %[val], %[i] \n" \ 90 " " #asm_op " %[val], %[val], %[i] \n" \
56 " scond %[val], [%[ctr]] \n" \ 91 " scond %[val], [%[ctr]] \n" \
57 " bnz 1b \n" \ 92 " \n" \
93 SCOND_FAIL_RETRY_ASM \
94 \
58 : [val] "=&r" (val) \ 95 : [val] "=&r" (val) \
96 SCOND_FAIL_RETRY_VARS \
59 : [ctr] "r" (&v->counter), \ 97 : [ctr] "r" (&v->counter), \
60 [i] "ir" (i) \ 98 [i] "ir" (i) \
61 : "cc"); \ 99 : "cc"); \
@@ -142,6 +180,9 @@ ATOMIC_OP(and, &=, and)
142#undef ATOMIC_OPS 180#undef ATOMIC_OPS
143#undef ATOMIC_OP_RETURN 181#undef ATOMIC_OP_RETURN
144#undef ATOMIC_OP 182#undef ATOMIC_OP
183#undef SCOND_FAIL_RETRY_VAR_DEF
184#undef SCOND_FAIL_RETRY_ASM
185#undef SCOND_FAIL_RETRY_VARS
145 186
146/** 187/**
147 * __atomic_add_unless - add unless the number is a given value 188 * __atomic_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 9fd5a0221671..2a84525b5aa4 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -20,6 +20,11 @@
20 20
21#ifdef CONFIG_ARC_HAS_LLSC 21#ifdef CONFIG_ARC_HAS_LLSC
22 22
23/*
24 * A normal LLOCK/SCOND based system, w/o need for livelock workaround
25 */
26#ifndef CONFIG_ARC_STAR_9000923308
27
23static inline void arch_spin_lock(arch_spinlock_t *lock) 28static inline void arch_spin_lock(arch_spinlock_t *lock)
24{ 29{
25 unsigned int val; 30 unsigned int val;
@@ -233,6 +238,294 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
233 smp_mb(); 238 smp_mb();
234} 239}
235 240
241#else /* CONFIG_ARC_STAR_9000923308 */
242
243/*
244 * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
245 * coherency transactions in the SCU. The exclusive line state keeps rotating
246 * among contenting cores leading to a never ending cycle. So break the cycle
247 * by deferring the retry of failed exclusive access (SCOND). The actual delay
248 * needed is function of number of contending cores as well as the unrelated
249 * coherency traffic from other cores. To keep the code simple, start off with
250 * small delay of 1 which would suffice most cases and in case of contention
251 * double the delay. Eventually the delay is sufficient such that the coherency
252 * pipeline is drained, thus a subsequent exclusive access would succeed.
253 */
254
255#define SCOND_FAIL_RETRY_VAR_DEF \
256 unsigned int delay, tmp; \
257
258#define SCOND_FAIL_RETRY_ASM \
259 " ; --- scond fail delay --- \n" \
260 " mov %[tmp], %[delay] \n" /* tmp = delay */ \
261 "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
262 " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
263 " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \
264 " mov.z %[delay], 1 \n" /* handle overflow */ \
265 " b 1b \n" /* start over */ \
266 " \n" \
267 "4: ; --- done --- \n" \
268
269#define SCOND_FAIL_RETRY_VARS \
270 ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \
271
272static inline void arch_spin_lock(arch_spinlock_t *lock)
273{
274 unsigned int val;
275 SCOND_FAIL_RETRY_VAR_DEF;
276
277 smp_mb();
278
279 __asm__ __volatile__(
280 "0: mov %[delay], 1 \n"
281 "1: llock %[val], [%[slock]] \n"
282 " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
283 " scond %[LOCKED], [%[slock]] \n" /* acquire */
284 " bz 4f \n" /* done */
285 " \n"
286 SCOND_FAIL_RETRY_ASM
287
288 : [val] "=&r" (val)
289 SCOND_FAIL_RETRY_VARS
290 : [slock] "r" (&(lock->slock)),
291 [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
292 : "memory", "cc");
293
294 smp_mb();
295}
296
297/* 1 - lock taken successfully */
298static inline int arch_spin_trylock(arch_spinlock_t *lock)
299{
300 unsigned int val, got_it = 0;
301 SCOND_FAIL_RETRY_VAR_DEF;
302
303 smp_mb();
304
305 __asm__ __volatile__(
306 "0: mov %[delay], 1 \n"
307 "1: llock %[val], [%[slock]] \n"
308 " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
309 " scond %[LOCKED], [%[slock]] \n" /* acquire */
310 " bz.d 4f \n"
311 " mov.z %[got_it], 1 \n" /* got it */
312 " \n"
313 SCOND_FAIL_RETRY_ASM
314
315 : [val] "=&r" (val),
316 [got_it] "+&r" (got_it)
317 SCOND_FAIL_RETRY_VARS
318 : [slock] "r" (&(lock->slock)),
319 [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
320 : "memory", "cc");
321
322 smp_mb();
323
324 return got_it;
325}
326
327static inline void arch_spin_unlock(arch_spinlock_t *lock)
328{
329 smp_mb();
330
331 lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
332
333 smp_mb();
334}
335
336/*
337 * Read-write spinlocks, allowing multiple readers but only one writer.
338 * Unfair locking as Writers could be starved indefinitely by Reader(s)
339 */
340
341static inline void arch_read_lock(arch_rwlock_t *rw)
342{
343 unsigned int val;
344 SCOND_FAIL_RETRY_VAR_DEF;
345
346 smp_mb();
347
348 /*
349 * zero means writer holds the lock exclusively, deny Reader.
350 * Otherwise grant lock to first/subseq reader
351 *
352 * if (rw->counter > 0) {
353 * rw->counter--;
354 * ret = 1;
355 * }
356 */
357
358 __asm__ __volatile__(
359 "0: mov %[delay], 1 \n"
360 "1: llock %[val], [%[rwlock]] \n"
361 " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
362 " sub %[val], %[val], 1 \n" /* reader lock */
363 " scond %[val], [%[rwlock]] \n"
364 " bz 4f \n" /* done */
365 " \n"
366 SCOND_FAIL_RETRY_ASM
367
368 : [val] "=&r" (val)
369 SCOND_FAIL_RETRY_VARS
370 : [rwlock] "r" (&(rw->counter)),
371 [WR_LOCKED] "ir" (0)
372 : "memory", "cc");
373
374 smp_mb();
375}
376
377/* 1 - lock taken successfully */
378static inline int arch_read_trylock(arch_rwlock_t *rw)
379{
380 unsigned int val, got_it = 0;
381 SCOND_FAIL_RETRY_VAR_DEF;
382
383 smp_mb();
384
385 __asm__ __volatile__(
386 "0: mov %[delay], 1 \n"
387 "1: llock %[val], [%[rwlock]] \n"
388 " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
389 " sub %[val], %[val], 1 \n" /* counter-- */
390 " scond %[val], [%[rwlock]] \n"
391 " bz.d 4f \n"
392 " mov.z %[got_it], 1 \n" /* got it */
393 " \n"
394 SCOND_FAIL_RETRY_ASM
395
396 : [val] "=&r" (val),
397 [got_it] "+&r" (got_it)
398 SCOND_FAIL_RETRY_VARS
399 : [rwlock] "r" (&(rw->counter)),
400 [WR_LOCKED] "ir" (0)
401 : "memory", "cc");
402
403 smp_mb();
404
405 return got_it;
406}
407
408static inline void arch_write_lock(arch_rwlock_t *rw)
409{
410 unsigned int val;
411 SCOND_FAIL_RETRY_VAR_DEF;
412
413 smp_mb();
414
415 /*
416 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
417 * deny writer. Otherwise if unlocked grant to writer
418 * Hence the claim that Linux rwlocks are unfair to writers.
419 * (can be starved for an indefinite time by readers).
420 *
421 * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
422 * rw->counter = 0;
423 * ret = 1;
424 * }
425 */
426
427 __asm__ __volatile__(
428 "0: mov %[delay], 1 \n"
429 "1: llock %[val], [%[rwlock]] \n"
430 " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
431 " mov %[val], %[WR_LOCKED] \n"
432 " scond %[val], [%[rwlock]] \n"
433 " bz 4f \n"
434 " \n"
435 SCOND_FAIL_RETRY_ASM
436
437 : [val] "=&r" (val)
438 SCOND_FAIL_RETRY_VARS
439 : [rwlock] "r" (&(rw->counter)),
440 [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
441 [WR_LOCKED] "ir" (0)
442 : "memory", "cc");
443
444 smp_mb();
445}
446
447/* 1 - lock taken successfully */
448static inline int arch_write_trylock(arch_rwlock_t *rw)
449{
450 unsigned int val, got_it = 0;
451 SCOND_FAIL_RETRY_VAR_DEF;
452
453 smp_mb();
454
455 __asm__ __volatile__(
456 "0: mov %[delay], 1 \n"
457 "1: llock %[val], [%[rwlock]] \n"
458 " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
459 " mov %[val], %[WR_LOCKED] \n"
460 " scond %[val], [%[rwlock]] \n"
461 " bz.d 4f \n"
462 " mov.z %[got_it], 1 \n" /* got it */
463 " \n"
464 SCOND_FAIL_RETRY_ASM
465
466 : [val] "=&r" (val),
467 [got_it] "+&r" (got_it)
468 SCOND_FAIL_RETRY_VARS
469 : [rwlock] "r" (&(rw->counter)),
470 [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
471 [WR_LOCKED] "ir" (0)
472 : "memory", "cc");
473
474 smp_mb();
475
476 return got_it;
477}
478
479static inline void arch_read_unlock(arch_rwlock_t *rw)
480{
481 unsigned int val;
482
483 smp_mb();
484
485 /*
486 * rw->counter++;
487 */
488 __asm__ __volatile__(
489 "1: llock %[val], [%[rwlock]] \n"
490 " add %[val], %[val], 1 \n"
491 " scond %[val], [%[rwlock]] \n"
492 " bnz 1b \n"
493 " \n"
494 : [val] "=&r" (val)
495 : [rwlock] "r" (&(rw->counter))
496 : "memory", "cc");
497
498 smp_mb();
499}
500
501static inline void arch_write_unlock(arch_rwlock_t *rw)
502{
503 unsigned int val;
504
505 smp_mb();
506
507 /*
508 * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
509 */
510 __asm__ __volatile__(
511 "1: llock %[val], [%[rwlock]] \n"
512 " scond %[UNLOCKED], [%[rwlock]]\n"
513 " bnz 1b \n"
514 " \n"
515 : [val] "=&r" (val)
516 : [rwlock] "r" (&(rw->counter)),
517 [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__)
518 : "memory", "cc");
519
520 smp_mb();
521}
522
523#undef SCOND_FAIL_RETRY_VAR_DEF
524#undef SCOND_FAIL_RETRY_ASM
525#undef SCOND_FAIL_RETRY_VARS
526
527#endif /* CONFIG_ARC_STAR_9000923308 */
528
236#else /* !CONFIG_ARC_HAS_LLSC */ 529#else /* !CONFIG_ARC_HAS_LLSC */
237 530
238static inline void arch_spin_lock(arch_spinlock_t *lock) 531static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index f2f771bd3ede..cabde9dc0696 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -336,6 +336,10 @@ static void arc_chk_core_config(void)
336 pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n"); 336 pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
337 else if (!cpu->extn.fpu_dp && fpu_enabled) 337 else if (!cpu->extn.fpu_dp && fpu_enabled)
338 panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); 338 panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
339
340 if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
341 !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
342 panic("llock/scond livelock workaround missing\n");
339} 343}
340 344
341/* 345/*