rcu,locking: Privatize smp_mb__after_unlock_lock()

RCU is the only thing that uses smp_mb__after_unlock_lock(), and is likely the only thing that ever will use it, so this commit makes this macro private to RCU. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: "linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>
author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2015-07-14 21:35:23 -0400
committer: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2015-08-04 11:49:21 -0400
commit: 12d560f4ea87030667438a169912380be00cea4b (patch)
tree: 3b60a7b97e849bd68573db48dd8608cb43f05694
parent: 3dbe43f6fba9f2a0e46e371733575a45704c22ab (diff)
4 files changed, 16 insertions, 79 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 318523872db5..eafa6a53f72c 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1854,16 +1854,10 @@ RELEASE are to the same lock variable, but only from the perspective of
 another CPU not holding that lock.  In short, a ACQUIRE followed by an
 RELEASE may -not- be assumed to be a full memory barrier.
-Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not
+Similarly, the reverse case of a RELEASE followed by an ACQUIRE does
-imply a full memory barrier.  If it is necessary for a RELEASE-ACQUIRE
+not imply a full memory barrier.  Therefore, the CPU's execution of the
-pair to produce a full barrier, the ACQUIRE can be followed by an
+critical sections corresponding to the RELEASE and the ACQUIRE can cross,
-smp_mb__after_unlock_lock() invocation.  This will produce a full barrier
+so that:
-(including transitivity) if either (a) the RELEASE and the ACQUIRE are
-executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on
-the same variable.  The smp_mb__after_unlock_lock() primitive is free
-on many architectures.  Without smp_mb__after_unlock_lock(), the CPU's
-execution of the critical sections corresponding to the RELEASE and the
-ACQUIRE can cross, so that:
        *A = a;
        RELEASE M
@@ -1901,29 +1895,6 @@ the RELEASE would simply complete, thereby avoiding the deadlock.
        a sleep-unlock race, but the locking primitive needs to resolve
        such races properly in any case.
-With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
-For example, with the following code, the store to *A will always be
-seen by other CPUs before the store to *B:
-        *A = a;
-        RELEASE M
-        ACQUIRE N
-        smp_mb__after_unlock_lock();
-        *B = b;
-The operations will always occur in one of the following orders:
-        STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
-        STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
-        ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
-If the RELEASE and ACQUIRE were instead both operating on the same lock
-variable, only the first of these alternatives can occur.  In addition,
-the more strongly ordered systems may rule out some of the above orders.
-But in any case, as noted earlier, the smp_mb__after_unlock_lock()
-ensures that the store to *A will always be seen as happening before
-the store to *B.
 Locks and semaphores may not provide any guarantee of ordering on UP compiled
 systems, and so cannot be counted on in such a situation to actually achieve
 anything at all - especially with respect to I/O accesses - unless combined
@@ -2154,40 +2125,6 @@ But it won't see any of:
        *E, *F or *G following RELEASE Q
-However, if the following occurs:
-        CPU 1                           CPU 2
-        =============================== ===============================
-        WRITE_ONCE(*A, a);
-        ACQUIRE M                    [1]
-        WRITE_ONCE(*B, b);
-        WRITE_ONCE(*C, c);
-        RELEASE M            [1]
-        WRITE_ONCE(*D, d);              WRITE_ONCE(*E, e);
-                                        ACQUIRE M                    [2]
-                                        smp_mb__after_unlock_lock();
-                                        WRITE_ONCE(*F, f);
-                                        WRITE_ONCE(*G, g);
-                                        RELEASE M            [2]
-                                        WRITE_ONCE(*H, h);
-CPU 3 might see:
-        *E, ACQUIRE M [1], *C, *B, *A, RELEASE M [1],
-                ACQUIRE M [2], *H, *F, *G, RELEASE M [2], *D
-But assuming CPU 1 gets the lock first, CPU 3 won't see any of:
-        *B, *C, *D, *F, *G or *H preceding ACQUIRE M [1]
-        *A, *B or *C following RELEASE M [1]
-        *F, *G or *H preceding ACQUIRE M [2]
-        *A, *B, *C, *E, *F or *G following RELEASE M [2]
-Note that the smp_mb__after_unlock_lock() is critically important
-here: Without it CPU 3 might see some of the above orderings.
-Without smp_mb__after_unlock_lock(), the accesses are not guaranteed
-to be seen in order unless CPU 3 holds lock M.
 ACQUIRES VS I/O ACCESSES
 ------------------------
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072eecbe..523673d7583c 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,8 +28,6 @@
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
-#define smp_mb__after_unlock_lock()     smp_mb()  /* Full ordering for lock. */
 #ifdef CONFIG_PPC64
 /* use 0x800000yy when locked, where yy == CPU number */
 #ifdef __BIG_ENDIAN__
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 0063b24b4f36..16c5ed5a627c 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -130,16 +130,6 @@ do {								\
 #define smp_mb__before_spinlock()       smp_wmb()
 #endif
-/*
- * Place this after a lock-acquisition primitive to guarantee that
- * an UNLOCK+LOCK pair act as a full barrier.  This guarantee applies
- * if the UNLOCK and LOCK are executed by the same CPU or if the
- * UNLOCK and LOCK operate on the same lock variable.
- */
-#ifndef smp_mb__after_unlock_lock
-#define smp_mb__after_unlock_lock()     do { } while (0)
-#endif
 /**
 * raw_spin_unlock_wait - wait until the spinlock gets unlocked
 * @lock: the spinlock in question.
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 0412030ca882..2e991f8361e4 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -653,3 +653,15 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 }
 #endif /* #ifdef CONFIG_RCU_TRACE */
+/*
+ * Place this after a lock-acquisition primitive to guarantee that
+ * an UNLOCK+LOCK pair act as a full barrier.  This guarantee applies
+ * if the UNLOCK and LOCK are executed by the same CPU or if the
+ * UNLOCK and LOCK operate on the same lock variable.
+ */
+#ifdef CONFIG_PPC
+#define smp_mb__after_unlock_lock()     smp_mb()  /* Full ordering for lock. */
+#else /* #ifdef CONFIG_PPC */
+#define smp_mb__after_unlock_lock()     do { } while (0)
+#endif /* #else #ifdef CONFIG_PPC */
author	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2015-07-14 21:35:23 -0400
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2015-08-04 11:49:21 -0400
commit	12d560f4ea87030667438a169912380be00cea4b (patch)
tree	3b60a7b97e849bd68573db48dd8608cb43f05694
parent	3dbe43f6fba9f2a0e46e371733575a45704c22ab (diff)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 318523872db5..eafa6a53f72c 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt
@@ -1854,16 +1854,10 @@ RELEASE are to the same lock variable, but only from the perspective of
1854	another CPU not holding that lock. In short, a ACQUIRE followed by an	1854	another CPU not holding that lock. In short, a ACQUIRE followed by an
1855	RELEASE may -not- be assumed to be a full memory barrier.	1855	RELEASE may -not- be assumed to be a full memory barrier.
1856		1856
1857	Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not	1857	Similarly, the reverse case of a RELEASE followed by an ACQUIRE does
1858	imply a full memory barrier. If it is necessary for a RELEASE-ACQUIRE	1858	not imply a full memory barrier. Therefore, the CPU's execution of the
1859	pair to produce a full barrier, the ACQUIRE can be followed by an	1859	critical sections corresponding to the RELEASE and the ACQUIRE can cross,
1860	smp_mb__after_unlock_lock() invocation. This will produce a full barrier	1860	so that:
1861	(including transitivity) if either (a) the RELEASE and the ACQUIRE are
1862	executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on
1863	the same variable. The smp_mb__after_unlock_lock() primitive is free
1864	on many architectures. Without smp_mb__after_unlock_lock(), the CPU's
1865	execution of the critical sections corresponding to the RELEASE and the
1866	ACQUIRE can cross, so that:
1867		1861
1868	*A = a;	1862	*A = a;
1869	RELEASE M	1863	RELEASE M
@@ -1901,29 +1895,6 @@ the RELEASE would simply complete, thereby avoiding the deadlock.
1901	a sleep-unlock race, but the locking primitive needs to resolve	1895	a sleep-unlock race, but the locking primitive needs to resolve
1902	such races properly in any case.	1896	such races properly in any case.
1903		1897
1904	With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
1905	For example, with the following code, the store to *A will always be
1906	seen by other CPUs before the store to *B:
1907
1908	*A = a;
1909	RELEASE M
1910	ACQUIRE N
1911	smp_mb__after_unlock_lock();
1912	*B = b;
1913
1914	The operations will always occur in one of the following orders:
1915
1916	STORE A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE B
1917	STORE A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE B
1918	ACQUIRE, STORE A, RELEASE, smp_mb__after_unlock_lock(), STORE B
1919
1920	If the RELEASE and ACQUIRE were instead both operating on the same lock
1921	variable, only the first of these alternatives can occur. In addition,
1922	the more strongly ordered systems may rule out some of the above orders.
1923	But in any case, as noted earlier, the smp_mb__after_unlock_lock()
1924	ensures that the store to *A will always be seen as happening before
1925	the store to *B.
1926
1927	Locks and semaphores may not provide any guarantee of ordering on UP compiled	1898	Locks and semaphores may not provide any guarantee of ordering on UP compiled
1928	systems, and so cannot be counted on in such a situation to actually achieve	1899	systems, and so cannot be counted on in such a situation to actually achieve
1929	anything at all - especially with respect to I/O accesses - unless combined	1900	anything at all - especially with respect to I/O accesses - unless combined
@@ -2154,40 +2125,6 @@ But it won't see any of:
2154	E, F or *G following RELEASE Q	2125	E, F or *G following RELEASE Q
2155		2126
2156		2127
2157	However, if the following occurs:
2158
2159	CPU 1 CPU 2
2160	=============================== ===============================
2161	WRITE_ONCE(*A, a);
2162	ACQUIRE M [1]
2163	WRITE_ONCE(*B, b);
2164	WRITE_ONCE(*C, c);
2165	RELEASE M [1]
2166	WRITE_ONCE(D, d); WRITE_ONCE(E, e);
2167	ACQUIRE M [2]
2168	smp_mb__after_unlock_lock();
2169	WRITE_ONCE(*F, f);
2170	WRITE_ONCE(*G, g);
2171	RELEASE M [2]
2172	WRITE_ONCE(*H, h);
2173
2174	CPU 3 might see:
2175
2176	E, ACQUIRE M [1], C, B, A, RELEASE M [1],
2177	ACQUIRE M [2], H, F, G, RELEASE M [2], D
2178
2179	But assuming CPU 1 gets the lock first, CPU 3 won't see any of:
2180
2181	B, C, D, F, G or H preceding ACQUIRE M [1]
2182	A, B or *C following RELEASE M [1]
2183	F, G or *H preceding ACQUIRE M [2]
2184	A, B, C, E, F or G following RELEASE M [2]
2185
2186	Note that the smp_mb__after_unlock_lock() is critically important
2187	here: Without it CPU 3 might see some of the above orderings.
2188	Without smp_mb__after_unlock_lock(), the accesses are not guaranteed
2189	to be seen in order unless CPU 3 holds lock M.
2190
2191		2128
2192	ACQUIRES VS I/O ACCESSES	2129	ACQUIRES VS I/O ACCESSES
2193	------------------------	2130	------------------------


diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 4dbe072eecbe..523673d7583c 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,8 +28,6 @@
28	#include <asm/synch.h>	28	#include <asm/synch.h>
29	#include <asm/ppc-opcode.h>	29	#include <asm/ppc-opcode.h>
30		30
31	#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
32
33	#ifdef CONFIG_PPC64	31	#ifdef CONFIG_PPC64
34	/* use 0x800000yy when locked, where yy == CPU number */	32	/* use 0x800000yy when locked, where yy == CPU number */
35	#ifdef __BIG_ENDIAN__	33	#ifdef __BIG_ENDIAN__


diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 0063b24b4f36..16c5ed5a627c 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h
@@ -130,16 +130,6 @@ do { \
130	#define smp_mb__before_spinlock() smp_wmb()	130	#define smp_mb__before_spinlock() smp_wmb()
131	#endif	131	#endif
132		132
133	/*
134	* Place this after a lock-acquisition primitive to guarantee that
135	* an UNLOCK+LOCK pair act as a full barrier. This guarantee applies
136	* if the UNLOCK and LOCK are executed by the same CPU or if the
137	* UNLOCK and LOCK operate on the same lock variable.
138	*/
139	#ifndef smp_mb__after_unlock_lock
140	#define smp_mb__after_unlock_lock() do { } while (0)
141	#endif
142
143	/**	133	/**
144	* raw_spin_unlock_wait - wait until the spinlock gets unlocked	134	* raw_spin_unlock_wait - wait until the spinlock gets unlocked
145	* @lock: the spinlock in question.	135	* @lock: the spinlock in question.


diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 0412030ca882..2e991f8361e4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h
@@ -653,3 +653,15 @@ static inline void rcu_nocb_q_lengths(struct rcu_data rdp, long ql, long *qll)
653	#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */	653	#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
654	}	654	}
655	#endif /* #ifdef CONFIG_RCU_TRACE */	655	#endif /* #ifdef CONFIG_RCU_TRACE */
		656
		657	/*
		658	* Place this after a lock-acquisition primitive to guarantee that
		659	* an UNLOCK+LOCK pair act as a full barrier. This guarantee applies
		660	* if the UNLOCK and LOCK are executed by the same CPU or if the
		661	* UNLOCK and LOCK operate on the same lock variable.
		662	*/
		663	#ifdef CONFIG_PPC
		664	#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
		665	#else /* #ifdef CONFIG_PPC */
		666	#define smp_mb__after_unlock_lock() do { } while (0)
		667	#endif /* #else #ifdef CONFIG_PPC */