aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-11-06 08:57:36 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-12 04:37:17 -0500
commit47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 (patch)
tree4f1d94f4b09ecf0c1d99f295f2c31b22eebed492
parent93ea02bb84354370e51de803a9405f171f3edf88 (diff)
arch: Introduce smp_load_acquire(), smp_store_release()
A number of situations currently require the heavyweight smp_mb(), even though there is no need to order prior stores against later loads. Many architectures have much cheaper ways to handle these situations, but the Linux kernel currently has no portable way to make use of them. This commit therefore supplies smp_load_acquire() and smp_store_release() to remedy this situation. The new smp_load_acquire() primitive orders the specified load against any subsequent reads or writes, while the new smp_store_release() primitive orders the specifed store against any prior reads or writes. These primitives allow array-based circular FIFOs to be implemented without an smp_mb(), and also allow a theoretical hole in rcu_assign_pointer() to be closed at no additional expense on most architectures. In addition, the RCU experience transitioning from explicit smp_read_barrier_depends() and smp_wmb() to rcu_dereference() and rcu_assign_pointer(), respectively resulted in substantial improvements in readability. It therefore seems likely that replacing other explicit barriers with smp_load_acquire() and smp_store_release() will provide similar benefits. It appears that roughly half of the explicit barriers in core kernel code might be so replaced. [Changelog by PaulMck] Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Will Deacon <will.deacon@arm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Michael Ellerman <michael@ellerman.id.au> Cc: Michael Neuling <mikey@neuling.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Victor Kaplansky <VICTORK@il.ibm.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Oleg Nesterov <oleg@redhat.com> Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/arm/include/asm/barrier.h15
-rw-r--r--arch/arm64/include/asm/barrier.h50
-rw-r--r--arch/ia64/include/asm/barrier.h23
-rw-r--r--arch/metag/include/asm/barrier.h15
-rw-r--r--arch/mips/include/asm/barrier.h15
-rw-r--r--arch/powerpc/include/asm/barrier.h21
-rw-r--r--arch/s390/include/asm/barrier.h15
-rw-r--r--arch/sparc/include/asm/barrier_64.h15
-rw-r--r--arch/x86/include/asm/barrier.h43
-rw-r--r--include/asm-generic/barrier.h15
-rw-r--r--include/linux/compiler.h9
11 files changed, 234 insertions, 2 deletions
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 60f15e274e6d..2f59f7443396 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -59,6 +59,21 @@
59#define smp_wmb() dmb(ishst) 59#define smp_wmb() dmb(ishst)
60#endif 60#endif
61 61
62#define smp_store_release(p, v) \
63do { \
64 compiletime_assert_atomic_type(*p); \
65 smp_mb(); \
66 ACCESS_ONCE(*p) = (v); \
67} while (0)
68
69#define smp_load_acquire(p) \
70({ \
71 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
72 compiletime_assert_atomic_type(*p); \
73 smp_mb(); \
74 ___p1; \
75})
76
62#define read_barrier_depends() do { } while(0) 77#define read_barrier_depends() do { } while(0)
63#define smp_read_barrier_depends() do { } while(0) 78#define smp_read_barrier_depends() do { } while(0)
64 79
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index d4a63338a53c..78e20ba8806b 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -35,10 +35,60 @@
35#define smp_mb() barrier() 35#define smp_mb() barrier()
36#define smp_rmb() barrier() 36#define smp_rmb() barrier()
37#define smp_wmb() barrier() 37#define smp_wmb() barrier()
38
39#define smp_store_release(p, v) \
40do { \
41 compiletime_assert_atomic_type(*p); \
42 smp_mb(); \
43 ACCESS_ONCE(*p) = (v); \
44} while (0)
45
46#define smp_load_acquire(p) \
47({ \
48 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
49 compiletime_assert_atomic_type(*p); \
50 smp_mb(); \
51 ___p1; \
52})
53
38#else 54#else
55
39#define smp_mb() asm volatile("dmb ish" : : : "memory") 56#define smp_mb() asm volatile("dmb ish" : : : "memory")
40#define smp_rmb() asm volatile("dmb ishld" : : : "memory") 57#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
41#define smp_wmb() asm volatile("dmb ishst" : : : "memory") 58#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
59
60#define smp_store_release(p, v) \
61do { \
62 compiletime_assert_atomic_type(*p); \
63 switch (sizeof(*p)) { \
64 case 4: \
65 asm volatile ("stlr %w1, %0" \
66 : "=Q" (*p) : "r" (v) : "memory"); \
67 break; \
68 case 8: \
69 asm volatile ("stlr %1, %0" \
70 : "=Q" (*p) : "r" (v) : "memory"); \
71 break; \
72 } \
73} while (0)
74
75#define smp_load_acquire(p) \
76({ \
77 typeof(*p) ___p1; \
78 compiletime_assert_atomic_type(*p); \
79 switch (sizeof(*p)) { \
80 case 4: \
81 asm volatile ("ldar %w0, %1" \
82 : "=r" (___p1) : "Q" (*p) : "memory"); \
83 break; \
84 case 8: \
85 asm volatile ("ldar %0, %1" \
86 : "=r" (___p1) : "Q" (*p) : "memory"); \
87 break; \
88 } \
89 ___p1; \
90})
91
42#endif 92#endif
43 93
44#define read_barrier_depends() do { } while(0) 94#define read_barrier_depends() do { } while(0)
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 60576e06b6fb..d0a69aa35e27 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -45,14 +45,37 @@
45# define smp_rmb() rmb() 45# define smp_rmb() rmb()
46# define smp_wmb() wmb() 46# define smp_wmb() wmb()
47# define smp_read_barrier_depends() read_barrier_depends() 47# define smp_read_barrier_depends() read_barrier_depends()
48
48#else 49#else
50
49# define smp_mb() barrier() 51# define smp_mb() barrier()
50# define smp_rmb() barrier() 52# define smp_rmb() barrier()
51# define smp_wmb() barrier() 53# define smp_wmb() barrier()
52# define smp_read_barrier_depends() do { } while(0) 54# define smp_read_barrier_depends() do { } while(0)
55
53#endif 56#endif
54 57
55/* 58/*
59 * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
60 * need for asm trickery!
61 */
62
63#define smp_store_release(p, v) \
64do { \
65 compiletime_assert_atomic_type(*p); \
66 barrier(); \
67 ACCESS_ONCE(*p) = (v); \
68} while (0)
69
70#define smp_load_acquire(p) \
71({ \
72 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
73 compiletime_assert_atomic_type(*p); \
74 barrier(); \
75 ___p1; \
76})
77
78/*
56 * XXX check on this ---I suspect what Linus really wants here is 79 * XXX check on this ---I suspect what Linus really wants here is
57 * acquire vs release semantics but we can't discuss this stuff with 80 * acquire vs release semantics but we can't discuss this stuff with
58 * Linus just yet. Grrr... 81 * Linus just yet. Grrr...
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c90bfc6bf648..5d6b4b407dda 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -82,4 +82,19 @@ static inline void fence(void)
82#define smp_read_barrier_depends() do { } while (0) 82#define smp_read_barrier_depends() do { } while (0)
83#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 83#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
84 84
85#define smp_store_release(p, v) \
86do { \
87 compiletime_assert_atomic_type(*p); \
88 smp_mb(); \
89 ACCESS_ONCE(*p) = (v); \
90} while (0)
91
92#define smp_load_acquire(p) \
93({ \
94 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
95 compiletime_assert_atomic_type(*p); \
96 smp_mb(); \
97 ___p1; \
98})
99
85#endif /* _ASM_METAG_BARRIER_H */ 100#endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index f26d8e1bf3c3..e1aa4e4c2984 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -180,4 +180,19 @@
180#define nudge_writes() mb() 180#define nudge_writes() mb()
181#endif 181#endif
182 182
183#define smp_store_release(p, v) \
184do { \
185 compiletime_assert_atomic_type(*p); \
186 smp_mb(); \
187 ACCESS_ONCE(*p) = (v); \
188} while (0)
189
190#define smp_load_acquire(p) \
191({ \
192 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
193 compiletime_assert_atomic_type(*p); \
194 smp_mb(); \
195 ___p1; \
196})
197
183#endif /* __ASM_BARRIER_H */ 198#endif /* __ASM_BARRIER_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ae782254e731..f89da808ce31 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -45,11 +45,15 @@
45# define SMPWMB eieio 45# define SMPWMB eieio
46#endif 46#endif
47 47
48#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
49
48#define smp_mb() mb() 50#define smp_mb() mb()
49#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") 51#define smp_rmb() __lwsync()
50#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") 52#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
51#define smp_read_barrier_depends() read_barrier_depends() 53#define smp_read_barrier_depends() read_barrier_depends()
52#else 54#else
55#define __lwsync() barrier()
56
53#define smp_mb() barrier() 57#define smp_mb() barrier()
54#define smp_rmb() barrier() 58#define smp_rmb() barrier()
55#define smp_wmb() barrier() 59#define smp_wmb() barrier()
@@ -65,4 +69,19 @@
65#define data_barrier(x) \ 69#define data_barrier(x) \
66 asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory"); 70 asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
67 71
72#define smp_store_release(p, v) \
73do { \
74 compiletime_assert_atomic_type(*p); \
75 __lwsync(); \
76 ACCESS_ONCE(*p) = (v); \
77} while (0)
78
79#define smp_load_acquire(p) \
80({ \
81 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
82 compiletime_assert_atomic_type(*p); \
83 __lwsync(); \
84 ___p1; \
85})
86
68#endif /* _ASM_POWERPC_BARRIER_H */ 87#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
32 32
33#define set_mb(var, value) do { var = value; mb(); } while (0) 33#define set_mb(var, value) do { var = value; mb(); } while (0)
34 34
35#define smp_store_release(p, v) \
36do { \
37 compiletime_assert_atomic_type(*p); \
38 barrier(); \
39 ACCESS_ONCE(*p) = (v); \
40} while (0)
41
42#define smp_load_acquire(p) \
43({ \
44 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
45 compiletime_assert_atomic_type(*p); \
46 barrier(); \
47 ___p1; \
48})
49
35#endif /* __ASM_BARRIER_H */ 50#endif /* __ASM_BARRIER_H */
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 95d45986f908..b5aad964558e 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
53 53
54#define smp_read_barrier_depends() do { } while(0) 54#define smp_read_barrier_depends() do { } while(0)
55 55
56#define smp_store_release(p, v) \
57do { \
58 compiletime_assert_atomic_type(*p); \
59 barrier(); \
60 ACCESS_ONCE(*p) = (v); \
61} while (0)
62
63#define smp_load_acquire(p) \
64({ \
65 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
66 compiletime_assert_atomic_type(*p); \
67 barrier(); \
68 ___p1; \
69})
70
56#endif /* !(__SPARC64_BARRIER_H) */ 71#endif /* !(__SPARC64_BARRIER_H) */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index c6cd358a1eec..04a48903b2eb 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -92,12 +92,53 @@
92#endif 92#endif
93#define smp_read_barrier_depends() read_barrier_depends() 93#define smp_read_barrier_depends() read_barrier_depends()
94#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 94#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
95#else 95#else /* !SMP */
96#define smp_mb() barrier() 96#define smp_mb() barrier()
97#define smp_rmb() barrier() 97#define smp_rmb() barrier()
98#define smp_wmb() barrier() 98#define smp_wmb() barrier()
99#define smp_read_barrier_depends() do { } while (0) 99#define smp_read_barrier_depends() do { } while (0)
100#define set_mb(var, value) do { var = value; barrier(); } while (0) 100#define set_mb(var, value) do { var = value; barrier(); } while (0)
101#endif /* SMP */
102
103#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
104
105/*
106 * For either of these options x86 doesn't have a strong TSO memory
107 * model and we should fall back to full barriers.
108 */
109
110#define smp_store_release(p, v) \
111do { \
112 compiletime_assert_atomic_type(*p); \
113 smp_mb(); \
114 ACCESS_ONCE(*p) = (v); \
115} while (0)
116
117#define smp_load_acquire(p) \
118({ \
119 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
120 compiletime_assert_atomic_type(*p); \
121 smp_mb(); \
122 ___p1; \
123})
124
125#else /* regular x86 TSO memory ordering */
126
127#define smp_store_release(p, v) \
128do { \
129 compiletime_assert_atomic_type(*p); \
130 barrier(); \
131 ACCESS_ONCE(*p) = (v); \
132} while (0)
133
134#define smp_load_acquire(p) \
135({ \
136 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
137 compiletime_assert_atomic_type(*p); \
138 barrier(); \
139 ___p1; \
140})
141
101#endif 142#endif
102 143
103/* 144/*
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index d12a90f93689..6f692f8ac664 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -62,5 +62,20 @@
62#define set_mb(var, value) do { (var) = (value); mb(); } while (0) 62#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
63#endif 63#endif
64 64
65#define smp_store_release(p, v) \
66do { \
67 compiletime_assert_atomic_type(*p); \
68 smp_mb(); \
69 ACCESS_ONCE(*p) = (v); \
70} while (0)
71
72#define smp_load_acquire(p) \
73({ \
74 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
75 compiletime_assert_atomic_type(*p); \
76 smp_mb(); \
77 ___p1; \
78})
79
65#endif /* !__ASSEMBLY__ */ 80#endif /* !__ASSEMBLY__ */
66#endif /* __ASM_GENERIC_BARRIER_H */ 81#endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 92669cd182a6..fe7a686dfd8d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
298# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) 298# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
299#endif 299#endif
300 300
301/* Is this type a native word size -- useful for atomic operations */
302#ifndef __native_word
303# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
304#endif
305
301/* Compile time object size, -1 for unknown */ 306/* Compile time object size, -1 for unknown */
302#ifndef __compiletime_object_size 307#ifndef __compiletime_object_size
303# define __compiletime_object_size(obj) -1 308# define __compiletime_object_size(obj) -1
@@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
337#define compiletime_assert(condition, msg) \ 342#define compiletime_assert(condition, msg) \
338 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) 343 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
339 344
345#define compiletime_assert_atomic_type(t) \
346 compiletime_assert(__native_word(t), \
347 "Need native word sized stores/loads for atomicity.")
348
340/* 349/*
341 * Prevent the compiler from merging or refetching accesses. The compiler 350 * Prevent the compiler from merging or refetching accesses. The compiler
342 * is also forbidden from reordering successive instances of ACCESS_ONCE(), 351 * is also forbidden from reordering successive instances of ACCESS_ONCE(),