1 files changed, 50 insertions, 20 deletions
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index fd57888d4942..e089157dcf97 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -114,29 +114,48 @@ do {								\
 #endif /*arch_spin_is_contended*/
 /*
- * This barrier must provide two things:
+ * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
+ * between program-order earlier lock acquisitions and program-order later
+ * memory accesses.
 *
- *   - it must guarantee a STORE before the spin_lock() is ordered against a
+ * This guarantees that the following two properties hold:
- *     LOAD after it, see the comments at its two usage sites.
 *
- *   - it must ensure the critical section is RCsc.
+ *   1) Given the snippet:
 *
- * The latter is important for cases where we observe values written by other
+ *        { X = 0;  Y = 0; }
- * CPUs in spin-loops, without barriers, while being subject to scheduling.
 *
- * CPU0                 CPU1                    CPU2
+ *        CPU0                          CPU1
 *
- *                      for (;;) {
+ *        WRITE_ONCE(X, 1);             WRITE_ONCE(Y, 1);
- *                        if (READ_ONCE(X))
+ *        spin_lock(S);                 smp_mb();
- *                          break;
+ *        smp_mb__after_spinlock();     r1 = READ_ONCE(X);
- *                      }
+ *        r0 = READ_ONCE(Y);
- * X=1
+ *        spin_unlock(S);
- *                      <sched-out>
- *                                              <sched-in>
- *                                              r = X;
 *
- * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
+ *      it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
- * we get migrated and CPU2 sees X==0.
+ *      and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
+ *      preceding the call to smp_mb__after_spinlock() in __schedule() and in
+ *      try_to_wake_up().
+ *
+ *   2) Given the snippet:
+ *
+ *  { X = 0;  Y = 0; }
+ *
+ *  CPU0                CPU1                            CPU2
+ *
+ *  spin_lock(S);       spin_lock(S);                   r1 = READ_ONCE(Y);
+ *  WRITE_ONCE(X, 1);   smp_mb__after_spinlock();       smp_rmb();
+ *  spin_unlock(S);     r0 = READ_ONCE(X);              r2 = READ_ONCE(X);
+ *                      WRITE_ONCE(Y, 1);
+ *                      spin_unlock(S);
+ *
+ *      it is forbidden that CPU0's critical section executes before CPU1's
+ *      critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
+ *      and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
+ *      preceding the calls to smp_rmb() in try_to_wake_up() for similar
+ *      snippets but "projected" onto two CPUs.
+ *
+ * Property (2) upgrades the lock to an RCsc lock.
 *
 * Since most load-store architectures implement ACQUIRE with an smp_mb() after
 * the LL/SC loop, they need no further barriers. Similarly all our TSO
@@ -432,9 +451,20 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
 #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
                __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
-int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
+int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
-                           size_t max_size, unsigned int cpu_mult,
+                             size_t max_size, unsigned int cpu_mult,
-                           gfp_t gfp);
+                             gfp_t gfp, const char *name,
+                             struct lock_class_key *key);
+#define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp)    \
+        ({                                                                   \
+                static struct lock_class_key key;                            \
+                int ret;                                                     \
+                                                                             \
+                ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size,   \
+                                               cpu_mult, gfp, #locks, &key); \
+                ret;                                                         \
+        })
 void free_bucket_spinlocks(spinlock_t *locks);