diff options
Diffstat (limited to 'include/linux/spinlock.h')
| -rw-r--r-- | include/linux/spinlock.h | 70 |
1 files changed, 50 insertions, 20 deletions
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index fd57888d4942..e089157dcf97 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h | |||
| @@ -114,29 +114,48 @@ do { \ | |||
| 114 | #endif /*arch_spin_is_contended*/ | 114 | #endif /*arch_spin_is_contended*/ |
| 115 | 115 | ||
| 116 | /* | 116 | /* |
| 117 | * This barrier must provide two things: | 117 | * smp_mb__after_spinlock() provides the equivalent of a full memory barrier |
| 118 | * between program-order earlier lock acquisitions and program-order later | ||
| 119 | * memory accesses. | ||
| 118 | * | 120 | * |
| 119 | * - it must guarantee a STORE before the spin_lock() is ordered against a | 121 | * This guarantees that the following two properties hold: |
| 120 | * LOAD after it, see the comments at its two usage sites. | ||
| 121 | * | 122 | * |
| 122 | * - it must ensure the critical section is RCsc. | 123 | * 1) Given the snippet: |
| 123 | * | 124 | * |
| 124 | * The latter is important for cases where we observe values written by other | 125 | * { X = 0; Y = 0; } |
| 125 | * CPUs in spin-loops, without barriers, while being subject to scheduling. | ||
| 126 | * | 126 | * |
| 127 | * CPU0 CPU1 CPU2 | 127 | * CPU0 CPU1 |
| 128 | * | 128 | * |
| 129 | * for (;;) { | 129 | * WRITE_ONCE(X, 1); WRITE_ONCE(Y, 1); |
| 130 | * if (READ_ONCE(X)) | 130 | * spin_lock(S); smp_mb(); |
| 131 | * break; | 131 | * smp_mb__after_spinlock(); r1 = READ_ONCE(X); |
| 132 | * } | 132 | * r0 = READ_ONCE(Y); |
| 133 | * X=1 | 133 | * spin_unlock(S); |
| 134 | * <sched-out> | ||
| 135 | * <sched-in> | ||
| 136 | * r = X; | ||
| 137 | * | 134 | * |
| 138 | * without transitivity it could be that CPU1 observes X!=0 breaks the loop, | 135 | * it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0) |
| 139 | * we get migrated and CPU2 sees X==0. | 136 | * and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments |
| 137 | * preceding the call to smp_mb__after_spinlock() in __schedule() and in | ||
| 138 | * try_to_wake_up(). | ||
| 139 | * | ||
| 140 | * 2) Given the snippet: | ||
| 141 | * | ||
| 142 | * { X = 0; Y = 0; } | ||
| 143 | * | ||
| 144 | * CPU0 CPU1 CPU2 | ||
| 145 | * | ||
| 146 | * spin_lock(S); spin_lock(S); r1 = READ_ONCE(Y); | ||
| 147 | * WRITE_ONCE(X, 1); smp_mb__after_spinlock(); smp_rmb(); | ||
| 148 | * spin_unlock(S); r0 = READ_ONCE(X); r2 = READ_ONCE(X); | ||
| 149 | * WRITE_ONCE(Y, 1); | ||
| 150 | * spin_unlock(S); | ||
| 151 | * | ||
| 152 | * it is forbidden that CPU0's critical section executes before CPU1's | ||
| 153 | * critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1) | ||
| 154 | * and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments | ||
| 155 | * preceding the calls to smp_rmb() in try_to_wake_up() for similar | ||
| 156 | * snippets but "projected" onto two CPUs. | ||
| 157 | * | ||
| 158 | * Property (2) upgrades the lock to an RCsc lock. | ||
| 140 | * | 159 | * |
| 141 | * Since most load-store architectures implement ACQUIRE with an smp_mb() after | 160 | * Since most load-store architectures implement ACQUIRE with an smp_mb() after |
| 142 | * the LL/SC loop, they need no further barriers. Similarly all our TSO | 161 | * the LL/SC loop, they need no further barriers. Similarly all our TSO |
| @@ -432,9 +451,20 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, | |||
| 432 | #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ | 451 | #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ |
| 433 | __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) | 452 | __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) |
| 434 | 453 | ||
| 435 | int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, | 454 | int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, |
| 436 | size_t max_size, unsigned int cpu_mult, | 455 | size_t max_size, unsigned int cpu_mult, |
| 437 | gfp_t gfp); | 456 | gfp_t gfp, const char *name, |
| 457 | struct lock_class_key *key); | ||
| 458 | |||
| 459 | #define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp) \ | ||
| 460 | ({ \ | ||
| 461 | static struct lock_class_key key; \ | ||
| 462 | int ret; \ | ||
| 463 | \ | ||
| 464 | ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size, \ | ||
| 465 | cpu_mult, gfp, #locks, &key); \ | ||
| 466 | ret; \ | ||
| 467 | }) | ||
| 438 | 468 | ||
| 439 | void free_bucket_spinlocks(spinlock_t *locks); | 469 | void free_bucket_spinlocks(spinlock_t *locks); |
| 440 | 470 | ||
