aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPalmer Dabbelt <palmer@sifive.com>2018-04-02 23:36:33 -0400
committerPalmer Dabbelt <palmer@sifive.com>2018-04-02 23:36:33 -0400
commit2c9046b71bb6ce2386e8847fce92b18aca9127c4 (patch)
tree332fc7da3e0d432b5de5113030007f3ef8ae3318
parent1cead2d7fec53a8d385934caefcf5dbb1b7d778b (diff)
parent5ce6c1f3535fa8d2134468547377b7b737042834 (diff)
RISC-V: Assorted memory model fixes
These fixes fall into three categories * The definiton of __smp_{store_release,load_acquire}, which allow us to emit a full fence when unnecessary. * Fixes to avoid relying on the behavior of "*.aqrl" atomics, as those are specified in the currently released RISC-V memory model draft in a way that makes them useless for Linux. This might change in the future, but now the code matches the memory model spec as it's written so at least we're getting closer to something sane. The actual fix is to delete the RISC-V specific atomics and drop back to generic versions that use the new fences from above. * Cleanups to our atomic macros, which are mostly non-functional changes. Unfortunately I haven't given these as thorough of a testing as I probably should have, but I've poked through the code and they seem generally OK.
-rw-r--r--arch/riscv/include/asm/atomic.h417
-rw-r--r--arch/riscv/include/asm/barrier.h15
-rw-r--r--arch/riscv/include/asm/cmpxchg.h391
-rw-r--r--arch/riscv/include/asm/fence.h12
-rw-r--r--arch/riscv/include/asm/spinlock.h29
5 files changed, 630 insertions, 234 deletions
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index e65d1cd89e28..855115ace98c 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -24,6 +24,20 @@
24#include <asm/barrier.h> 24#include <asm/barrier.h>
25 25
26#define ATOMIC_INIT(i) { (i) } 26#define ATOMIC_INIT(i) { (i) }
27
28#define __atomic_op_acquire(op, args...) \
29({ \
30 typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \
31 __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory"); \
32 __ret; \
33})
34
35#define __atomic_op_release(op, args...) \
36({ \
37 __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory"); \
38 op##_relaxed(args); \
39})
40
27static __always_inline int atomic_read(const atomic_t *v) 41static __always_inline int atomic_read(const atomic_t *v)
28{ 42{
29 return READ_ONCE(v->counter); 43 return READ_ONCE(v->counter);
@@ -50,22 +64,23 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
50 * have the AQ or RL bits set. These don't return anything, so there's only 64 * have the AQ or RL bits set. These don't return anything, so there's only
51 * one version to worry about. 65 * one version to worry about.
52 */ 66 */
53#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ 67#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
54static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ 68static __always_inline \
55{ \ 69void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
56 __asm__ __volatile__ ( \ 70{ \
57 "amo" #asm_op "." #asm_type " zero, %1, %0" \ 71 __asm__ __volatile__ ( \
58 : "+A" (v->counter) \ 72 " amo" #asm_op "." #asm_type " zero, %1, %0" \
59 : "r" (I) \ 73 : "+A" (v->counter) \
60 : "memory"); \ 74 : "r" (I) \
61} 75 : "memory"); \
76} \
62 77
63#ifdef CONFIG_GENERIC_ATOMIC64 78#ifdef CONFIG_GENERIC_ATOMIC64
64#define ATOMIC_OPS(op, asm_op, I) \ 79#define ATOMIC_OPS(op, asm_op, I) \
65 ATOMIC_OP (op, asm_op, I, w, int, ) 80 ATOMIC_OP (op, asm_op, I, w, int, )
66#else 81#else
67#define ATOMIC_OPS(op, asm_op, I) \ 82#define ATOMIC_OPS(op, asm_op, I) \
68 ATOMIC_OP (op, asm_op, I, w, int, ) \ 83 ATOMIC_OP (op, asm_op, I, w, int, ) \
69 ATOMIC_OP (op, asm_op, I, d, long, 64) 84 ATOMIC_OP (op, asm_op, I, d, long, 64)
70#endif 85#endif
71 86
@@ -79,75 +94,115 @@ ATOMIC_OPS(xor, xor, i)
79#undef ATOMIC_OPS 94#undef ATOMIC_OPS
80 95
81/* 96/*
82 * Atomic ops that have ordered, relaxed, acquire, and relese variants. 97 * Atomic ops that have ordered, relaxed, acquire, and release variants.
83 * There's two flavors of these: the arithmatic ops have both fetch and return 98 * There's two flavors of these: the arithmatic ops have both fetch and return
84 * versions, while the logical ops only have fetch versions. 99 * versions, while the logical ops only have fetch versions.
85 */ 100 */
86#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ 101#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
87static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ 102static __always_inline \
88{ \ 103c_type atomic##prefix##_fetch_##op##_relaxed(c_type i, \
89 register c_type ret; \ 104 atomic##prefix##_t *v) \
90 __asm__ __volatile__ ( \ 105{ \
91 "amo" #asm_op "." #asm_type #asm_or " %1, %2, %0" \ 106 register c_type ret; \
92 : "+A" (v->counter), "=r" (ret) \ 107 __asm__ __volatile__ ( \
93 : "r" (I) \ 108 " amo" #asm_op "." #asm_type " %1, %2, %0" \
94 : "memory"); \ 109 : "+A" (v->counter), "=r" (ret) \
95 return ret; \ 110 : "r" (I) \
111 : "memory"); \
112 return ret; \
113} \
114static __always_inline \
115c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
116{ \
117 register c_type ret; \
118 __asm__ __volatile__ ( \
119 " amo" #asm_op "." #asm_type ".aqrl %1, %2, %0" \
120 : "+A" (v->counter), "=r" (ret) \
121 : "r" (I) \
122 : "memory"); \
123 return ret; \
96} 124}
97 125
98#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ 126#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
99static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, atomic##prefix##_t *v) \ 127static __always_inline \
100{ \ 128c_type atomic##prefix##_##op##_return_relaxed(c_type i, \
101 return atomic##prefix##_fetch_##op##c_or(i, v) c_op I; \ 129 atomic##prefix##_t *v) \
130{ \
131 return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
132} \
133static __always_inline \
134c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
135{ \
136 return atomic##prefix##_fetch_##op(i, v) c_op I; \
102} 137}
103 138
104#ifdef CONFIG_GENERIC_ATOMIC64 139#ifdef CONFIG_GENERIC_ATOMIC64
105#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ 140#define ATOMIC_OPS(op, asm_op, c_op, I) \
106 ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ 141 ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
107 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) 142 ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
108#else 143#else
109#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ 144#define ATOMIC_OPS(op, asm_op, c_op, I) \
110 ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ 145 ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
111 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ 146 ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
112 ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ 147 ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \
113 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) 148 ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64)
114#endif 149#endif
115 150
116ATOMIC_OPS(add, add, +, i, , _relaxed) 151ATOMIC_OPS(add, add, +, i)
117ATOMIC_OPS(add, add, +, i, .aq , _acquire) 152ATOMIC_OPS(sub, add, +, -i)
118ATOMIC_OPS(add, add, +, i, .rl , _release) 153
119ATOMIC_OPS(add, add, +, i, .aqrl, ) 154#define atomic_add_return_relaxed atomic_add_return_relaxed
155#define atomic_sub_return_relaxed atomic_sub_return_relaxed
156#define atomic_add_return atomic_add_return
157#define atomic_sub_return atomic_sub_return
120 158
121ATOMIC_OPS(sub, add, +, -i, , _relaxed) 159#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
122ATOMIC_OPS(sub, add, +, -i, .aq , _acquire) 160#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
123ATOMIC_OPS(sub, add, +, -i, .rl , _release) 161#define atomic_fetch_add atomic_fetch_add
124ATOMIC_OPS(sub, add, +, -i, .aqrl, ) 162#define atomic_fetch_sub atomic_fetch_sub
163
164#ifndef CONFIG_GENERIC_ATOMIC64
165#define atomic64_add_return_relaxed atomic64_add_return_relaxed
166#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
167#define atomic64_add_return atomic64_add_return
168#define atomic64_sub_return atomic64_sub_return
169
170#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
171#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
172#define atomic64_fetch_add atomic64_fetch_add
173#define atomic64_fetch_sub atomic64_fetch_sub
174#endif
125 175
126#undef ATOMIC_OPS 176#undef ATOMIC_OPS
127 177
128#ifdef CONFIG_GENERIC_ATOMIC64 178#ifdef CONFIG_GENERIC_ATOMIC64
129#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ 179#define ATOMIC_OPS(op, asm_op, I) \
130 ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) 180 ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
131#else 181#else
132#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ 182#define ATOMIC_OPS(op, asm_op, I) \
133 ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ 183 ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
134 ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) 184 ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64)
135#endif 185#endif
136 186
137ATOMIC_OPS(and, and, i, , _relaxed) 187ATOMIC_OPS(and, and, i)
138ATOMIC_OPS(and, and, i, .aq , _acquire) 188ATOMIC_OPS( or, or, i)
139ATOMIC_OPS(and, and, i, .rl , _release) 189ATOMIC_OPS(xor, xor, i)
140ATOMIC_OPS(and, and, i, .aqrl, )
141 190
142ATOMIC_OPS( or, or, i, , _relaxed) 191#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
143ATOMIC_OPS( or, or, i, .aq , _acquire) 192#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
144ATOMIC_OPS( or, or, i, .rl , _release) 193#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
145ATOMIC_OPS( or, or, i, .aqrl, ) 194#define atomic_fetch_and atomic_fetch_and
195#define atomic_fetch_or atomic_fetch_or
196#define atomic_fetch_xor atomic_fetch_xor
146 197
147ATOMIC_OPS(xor, xor, i, , _relaxed) 198#ifndef CONFIG_GENERIC_ATOMIC64
148ATOMIC_OPS(xor, xor, i, .aq , _acquire) 199#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
149ATOMIC_OPS(xor, xor, i, .rl , _release) 200#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
150ATOMIC_OPS(xor, xor, i, .aqrl, ) 201#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
202#define atomic64_fetch_and atomic64_fetch_and
203#define atomic64_fetch_or atomic64_fetch_or
204#define atomic64_fetch_xor atomic64_fetch_xor
205#endif
151 206
152#undef ATOMIC_OPS 207#undef ATOMIC_OPS
153 208
@@ -157,22 +212,24 @@ ATOMIC_OPS(xor, xor, i, .aqrl, )
157/* 212/*
158 * The extra atomic operations that are constructed from one of the core 213 * The extra atomic operations that are constructed from one of the core
159 * AMO-based operations above (aside from sub, which is easier to fit above). 214 * AMO-based operations above (aside from sub, which is easier to fit above).
160 * These are required to perform a barrier, but they're OK this way because 215 * These are required to perform a full barrier, but they're OK this way
161 * atomic_*_return is also required to perform a barrier. 216 * because atomic_*_return is also required to perform a full barrier.
217 *
162 */ 218 */
163#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \ 219#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \
164static __always_inline bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ 220static __always_inline \
165{ \ 221bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
166 return atomic##prefix##_##func_op##_return(i, v) comp_op I; \ 222{ \
223 return atomic##prefix##_##func_op##_return(i, v) comp_op I; \
167} 224}
168 225
169#ifdef CONFIG_GENERIC_ATOMIC64 226#ifdef CONFIG_GENERIC_ATOMIC64
170#define ATOMIC_OPS(op, func_op, comp_op, I) \ 227#define ATOMIC_OPS(op, func_op, comp_op, I) \
171 ATOMIC_OP (op, func_op, comp_op, I, int, ) 228 ATOMIC_OP(op, func_op, comp_op, I, int, )
172#else 229#else
173#define ATOMIC_OPS(op, func_op, comp_op, I) \ 230#define ATOMIC_OPS(op, func_op, comp_op, I) \
174 ATOMIC_OP (op, func_op, comp_op, I, int, ) \ 231 ATOMIC_OP(op, func_op, comp_op, I, int, ) \
175 ATOMIC_OP (op, func_op, comp_op, I, long, 64) 232 ATOMIC_OP(op, func_op, comp_op, I, long, 64)
176#endif 233#endif
177 234
178ATOMIC_OPS(add_and_test, add, ==, 0) 235ATOMIC_OPS(add_and_test, add, ==, 0)
@@ -182,51 +239,87 @@ ATOMIC_OPS(add_negative, add, <, 0)
182#undef ATOMIC_OP 239#undef ATOMIC_OP
183#undef ATOMIC_OPS 240#undef ATOMIC_OPS
184 241
185#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ 242#define ATOMIC_OP(op, func_op, I, c_type, prefix) \
186static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ 243static __always_inline \
187{ \ 244void atomic##prefix##_##op(atomic##prefix##_t *v) \
188 atomic##prefix##_##func_op(I, v); \ 245{ \
246 atomic##prefix##_##func_op(I, v); \
189} 247}
190 248
191#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ 249#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
192static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ 250static __always_inline \
193{ \ 251c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v) \
194 return atomic##prefix##_fetch_##func_op(I, v); \ 252{ \
253 return atomic##prefix##_fetch_##func_op##_relaxed(I, v); \
254} \
255static __always_inline \
256c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
257{ \
258 return atomic##prefix##_fetch_##func_op(I, v); \
195} 259}
196 260
197#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \ 261#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \
198static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \ 262static __always_inline \
199{ \ 263c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v) \
200 return atomic##prefix##_fetch_##op(v) c_op I; \ 264{ \
265 return atomic##prefix##_fetch_##op##_relaxed(v) c_op I; \
266} \
267static __always_inline \
268c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \
269{ \
270 return atomic##prefix##_fetch_##op(v) c_op I; \
201} 271}
202 272
203#ifdef CONFIG_GENERIC_ATOMIC64 273#ifdef CONFIG_GENERIC_ATOMIC64
204#define ATOMIC_OPS(op, asm_op, c_op, I) \ 274#define ATOMIC_OPS(op, asm_op, c_op, I) \
205 ATOMIC_OP (op, asm_op, I, int, ) \ 275 ATOMIC_OP( op, asm_op, I, int, ) \
206 ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ 276 ATOMIC_FETCH_OP( op, asm_op, I, int, ) \
207 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) 277 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
208#else 278#else
209#define ATOMIC_OPS(op, asm_op, c_op, I) \ 279#define ATOMIC_OPS(op, asm_op, c_op, I) \
210 ATOMIC_OP (op, asm_op, I, int, ) \ 280 ATOMIC_OP( op, asm_op, I, int, ) \
211 ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ 281 ATOMIC_FETCH_OP( op, asm_op, I, int, ) \
212 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ 282 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
213 ATOMIC_OP (op, asm_op, I, long, 64) \ 283 ATOMIC_OP( op, asm_op, I, long, 64) \
214 ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ 284 ATOMIC_FETCH_OP( op, asm_op, I, long, 64) \
215 ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) 285 ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
216#endif 286#endif
217 287
218ATOMIC_OPS(inc, add, +, 1) 288ATOMIC_OPS(inc, add, +, 1)
219ATOMIC_OPS(dec, add, +, -1) 289ATOMIC_OPS(dec, add, +, -1)
220 290
291#define atomic_inc_return_relaxed atomic_inc_return_relaxed
292#define atomic_dec_return_relaxed atomic_dec_return_relaxed
293#define atomic_inc_return atomic_inc_return
294#define atomic_dec_return atomic_dec_return
295
296#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
297#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
298#define atomic_fetch_inc atomic_fetch_inc
299#define atomic_fetch_dec atomic_fetch_dec
300
301#ifndef CONFIG_GENERIC_ATOMIC64
302#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
303#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
304#define atomic64_inc_return atomic64_inc_return
305#define atomic64_dec_return atomic64_dec_return
306
307#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
308#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
309#define atomic64_fetch_inc atomic64_fetch_inc
310#define atomic64_fetch_dec atomic64_fetch_dec
311#endif
312
221#undef ATOMIC_OPS 313#undef ATOMIC_OPS
222#undef ATOMIC_OP 314#undef ATOMIC_OP
223#undef ATOMIC_FETCH_OP 315#undef ATOMIC_FETCH_OP
224#undef ATOMIC_OP_RETURN 316#undef ATOMIC_OP_RETURN
225 317
226#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \ 318#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \
227static __always_inline bool atomic##prefix##_##op(atomic##prefix##_t *v) \ 319static __always_inline \
228{ \ 320bool atomic##prefix##_##op(atomic##prefix##_t *v) \
229 return atomic##prefix##_##func_op##_return(v) comp_op I; \ 321{ \
322 return atomic##prefix##_##func_op##_return(v) comp_op I; \
230} 323}
231 324
232ATOMIC_OP(inc_and_test, inc, ==, 0, ) 325ATOMIC_OP(inc_and_test, inc, ==, 0, )
@@ -238,19 +331,19 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
238 331
239#undef ATOMIC_OP 332#undef ATOMIC_OP
240 333
241/* This is required to provide a barrier on success. */ 334/* This is required to provide a full barrier on success. */
242static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) 335static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
243{ 336{
244 int prev, rc; 337 int prev, rc;
245 338
246 __asm__ __volatile__ ( 339 __asm__ __volatile__ (
247 "0:\n\t" 340 "0: lr.w %[p], %[c]\n"
248 "lr.w.aqrl %[p], %[c]\n\t" 341 " beq %[p], %[u], 1f\n"
249 "beq %[p], %[u], 1f\n\t" 342 " add %[rc], %[p], %[a]\n"
250 "add %[rc], %[p], %[a]\n\t" 343 " sc.w.rl %[rc], %[rc], %[c]\n"
251 "sc.w.aqrl %[rc], %[rc], %[c]\n\t" 344 " bnez %[rc], 0b\n"
252 "bnez %[rc], 0b\n\t" 345 " fence rw, rw\n"
253 "1:" 346 "1:\n"
254 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) 347 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
255 : [a]"r" (a), [u]"r" (u) 348 : [a]"r" (a), [u]"r" (u)
256 : "memory"); 349 : "memory");
@@ -263,13 +356,13 @@ static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u)
263 long prev, rc; 356 long prev, rc;
264 357
265 __asm__ __volatile__ ( 358 __asm__ __volatile__ (
266 "0:\n\t" 359 "0: lr.d %[p], %[c]\n"
267 "lr.d.aqrl %[p], %[c]\n\t" 360 " beq %[p], %[u], 1f\n"
268 "beq %[p], %[u], 1f\n\t" 361 " add %[rc], %[p], %[a]\n"
269 "add %[rc], %[p], %[a]\n\t" 362 " sc.d.rl %[rc], %[rc], %[c]\n"
270 "sc.d.aqrl %[rc], %[rc], %[c]\n\t" 363 " bnez %[rc], 0b\n"
271 "bnez %[rc], 0b\n\t" 364 " fence rw, rw\n"
272 "1:" 365 "1:\n"
273 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) 366 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
274 : [a]"r" (a), [u]"r" (u) 367 : [a]"r" (a), [u]"r" (u)
275 : "memory"); 368 : "memory");
@@ -300,37 +393,63 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
300 393
301/* 394/*
302 * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as 395 * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
303 * {cmp,}xchg and the operations that return, so they need a barrier. 396 * {cmp,}xchg and the operations that return, so they need a full barrier.
304 */
305/*
306 * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
307 * assigning the same barrier to both the LR and SC operations, but that might
308 * not make any sense. We're waiting on a memory model specification to
309 * determine exactly what the right thing to do is here.
310 */ 397 */
311#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ 398#define ATOMIC_OP(c_t, prefix, size) \
312static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ 399static __always_inline \
313{ \ 400c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \
314 return __cmpxchg(&(v->counter), o, n, size, asm_or, asm_or); \ 401{ \
315} \ 402 return __xchg_relaxed(&(v->counter), n, size); \
316static __always_inline c_t atomic##prefix##_xchg##c_or(atomic##prefix##_t *v, c_t n) \ 403} \
317{ \ 404static __always_inline \
318 return __xchg(n, &(v->counter), size, asm_or); \ 405c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \
406{ \
407 return __xchg_acquire(&(v->counter), n, size); \
408} \
409static __always_inline \
410c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \
411{ \
412 return __xchg_release(&(v->counter), n, size); \
413} \
414static __always_inline \
415c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \
416{ \
417 return __xchg(&(v->counter), n, size); \
418} \
419static __always_inline \
420c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \
421 c_t o, c_t n) \
422{ \
423 return __cmpxchg_relaxed(&(v->counter), o, n, size); \
424} \
425static __always_inline \
426c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \
427 c_t o, c_t n) \
428{ \
429 return __cmpxchg_acquire(&(v->counter), o, n, size); \
430} \
431static __always_inline \
432c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \
433 c_t o, c_t n) \
434{ \
435 return __cmpxchg_release(&(v->counter), o, n, size); \
436} \
437static __always_inline \
438c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
439{ \
440 return __cmpxchg(&(v->counter), o, n, size); \
319} 441}
320 442
321#ifdef CONFIG_GENERIC_ATOMIC64 443#ifdef CONFIG_GENERIC_ATOMIC64
322#define ATOMIC_OPS(c_or, asm_or) \ 444#define ATOMIC_OPS() \
323 ATOMIC_OP( int, , c_or, 4, asm_or) 445 ATOMIC_OP( int, , 4)
324#else 446#else
325#define ATOMIC_OPS(c_or, asm_or) \ 447#define ATOMIC_OPS() \
326 ATOMIC_OP( int, , c_or, 4, asm_or) \ 448 ATOMIC_OP( int, , 4) \
327 ATOMIC_OP(long, 64, c_or, 8, asm_or) 449 ATOMIC_OP(long, 64, 8)
328#endif 450#endif
329 451
330ATOMIC_OPS( , .aqrl) 452ATOMIC_OPS()
331ATOMIC_OPS(_acquire, .aq)
332ATOMIC_OPS(_release, .rl)
333ATOMIC_OPS(_relaxed, )
334 453
335#undef ATOMIC_OPS 454#undef ATOMIC_OPS
336#undef ATOMIC_OP 455#undef ATOMIC_OP
@@ -340,13 +459,13 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
340 int prev, rc; 459 int prev, rc;
341 460
342 __asm__ __volatile__ ( 461 __asm__ __volatile__ (
343 "0:\n\t" 462 "0: lr.w %[p], %[c]\n"
344 "lr.w.aqrl %[p], %[c]\n\t" 463 " sub %[rc], %[p], %[o]\n"
345 "sub %[rc], %[p], %[o]\n\t" 464 " bltz %[rc], 1f\n"
346 "bltz %[rc], 1f\n\t" 465 " sc.w.rl %[rc], %[rc], %[c]\n"
347 "sc.w.aqrl %[rc], %[rc], %[c]\n\t" 466 " bnez %[rc], 0b\n"
348 "bnez %[rc], 0b\n\t" 467 " fence rw, rw\n"
349 "1:" 468 "1:\n"
350 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) 469 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
351 : [o]"r" (offset) 470 : [o]"r" (offset)
352 : "memory"); 471 : "memory");
@@ -361,13 +480,13 @@ static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset)
361 long prev, rc; 480 long prev, rc;
362 481
363 __asm__ __volatile__ ( 482 __asm__ __volatile__ (
364 "0:\n\t" 483 "0: lr.d %[p], %[c]\n"
365 "lr.d.aqrl %[p], %[c]\n\t" 484 " sub %[rc], %[p], %[o]\n"
366 "sub %[rc], %[p], %[o]\n\t" 485 " bltz %[rc], 1f\n"
367 "bltz %[rc], 1f\n\t" 486 " sc.d.rl %[rc], %[rc], %[c]\n"
368 "sc.d.aqrl %[rc], %[rc], %[c]\n\t" 487 " bnez %[rc], 0b\n"
369 "bnez %[rc], 0b\n\t" 488 " fence rw, rw\n"
370 "1:" 489 "1:\n"
371 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) 490 : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
372 : [o]"r" (offset) 491 : [o]"r" (offset)
373 : "memory"); 492 : "memory");
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 5510366d169a..d4628e4b3a5e 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,21 @@
38#define __smp_rmb() RISCV_FENCE(r,r) 38#define __smp_rmb() RISCV_FENCE(r,r)
39#define __smp_wmb() RISCV_FENCE(w,w) 39#define __smp_wmb() RISCV_FENCE(w,w)
40 40
41#define __smp_store_release(p, v) \
42do { \
43 compiletime_assert_atomic_type(*p); \
44 RISCV_FENCE(rw,w); \
45 WRITE_ONCE(*p, v); \
46} while (0)
47
48#define __smp_load_acquire(p) \
49({ \
50 typeof(*p) ___p1 = READ_ONCE(*p); \
51 compiletime_assert_atomic_type(*p); \
52 RISCV_FENCE(r,rw); \
53 ___p1; \
54})
55
41/* 56/*
42 * This is a very specific barrier: it's currently only used in two places in 57 * This is a very specific barrier: it's currently only used in two places in
43 * the kernel, both in the scheduler. See include/linux/spinlock.h for the two 58 * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index db249dbc7b97..c12833f7b6bd 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -17,45 +17,153 @@
17#include <linux/bug.h> 17#include <linux/bug.h>
18 18
19#include <asm/barrier.h> 19#include <asm/barrier.h>
20#include <asm/fence.h>
20 21
21#define __xchg(new, ptr, size, asm_or) \ 22#define __xchg_relaxed(ptr, new, size) \
22({ \ 23({ \
23 __typeof__(ptr) __ptr = (ptr); \ 24 __typeof__(ptr) __ptr = (ptr); \
24 __typeof__(new) __new = (new); \ 25 __typeof__(new) __new = (new); \
25 __typeof__(*(ptr)) __ret; \ 26 __typeof__(*(ptr)) __ret; \
26 switch (size) { \ 27 switch (size) { \
27 case 4: \ 28 case 4: \
28 __asm__ __volatile__ ( \ 29 __asm__ __volatile__ ( \
29 "amoswap.w" #asm_or " %0, %2, %1" \ 30 " amoswap.w %0, %2, %1\n" \
30 : "=r" (__ret), "+A" (*__ptr) \ 31 : "=r" (__ret), "+A" (*__ptr) \
31 : "r" (__new) \ 32 : "r" (__new) \
32 : "memory"); \ 33 : "memory"); \
33 break; \ 34 break; \
34 case 8: \ 35 case 8: \
35 __asm__ __volatile__ ( \ 36 __asm__ __volatile__ ( \
36 "amoswap.d" #asm_or " %0, %2, %1" \ 37 " amoswap.d %0, %2, %1\n" \
37 : "=r" (__ret), "+A" (*__ptr) \ 38 : "=r" (__ret), "+A" (*__ptr) \
38 : "r" (__new) \ 39 : "r" (__new) \
39 : "memory"); \ 40 : "memory"); \
40 break; \ 41 break; \
41 default: \ 42 default: \
42 BUILD_BUG(); \ 43 BUILD_BUG(); \
43 } \ 44 } \
44 __ret; \ 45 __ret; \
45}) 46})
46 47
47#define xchg(ptr, x) (__xchg((x), (ptr), sizeof(*(ptr)), .aqrl)) 48#define xchg_relaxed(ptr, x) \
48 49({ \
49#define xchg32(ptr, x) \ 50 __typeof__(*(ptr)) _x_ = (x); \
50({ \ 51 (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
51 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 52 _x_, sizeof(*(ptr))); \
52 xchg((ptr), (x)); \ 53})
53}) 54
54 55#define __xchg_acquire(ptr, new, size) \
55#define xchg64(ptr, x) \ 56({ \
56({ \ 57 __typeof__(ptr) __ptr = (ptr); \
57 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 58 __typeof__(new) __new = (new); \
58 xchg((ptr), (x)); \ 59 __typeof__(*(ptr)) __ret; \
60 switch (size) { \
61 case 4: \
62 __asm__ __volatile__ ( \
63 " amoswap.w %0, %2, %1\n" \
64 RISCV_ACQUIRE_BARRIER \
65 : "=r" (__ret), "+A" (*__ptr) \
66 : "r" (__new) \
67 : "memory"); \
68 break; \
69 case 8: \
70 __asm__ __volatile__ ( \
71 " amoswap.d %0, %2, %1\n" \
72 RISCV_ACQUIRE_BARRIER \
73 : "=r" (__ret), "+A" (*__ptr) \
74 : "r" (__new) \
75 : "memory"); \
76 break; \
77 default: \
78 BUILD_BUG(); \
79 } \
80 __ret; \
81})
82
83#define xchg_acquire(ptr, x) \
84({ \
85 __typeof__(*(ptr)) _x_ = (x); \
86 (__typeof__(*(ptr))) __xchg_acquire((ptr), \
87 _x_, sizeof(*(ptr))); \
88})
89
90#define __xchg_release(ptr, new, size) \
91({ \
92 __typeof__(ptr) __ptr = (ptr); \
93 __typeof__(new) __new = (new); \
94 __typeof__(*(ptr)) __ret; \
95 switch (size) { \
96 case 4: \
97 __asm__ __volatile__ ( \
98 RISCV_RELEASE_BARRIER \
99 " amoswap.w %0, %2, %1\n" \
100 : "=r" (__ret), "+A" (*__ptr) \
101 : "r" (__new) \
102 : "memory"); \
103 break; \
104 case 8: \
105 __asm__ __volatile__ ( \
106 RISCV_RELEASE_BARRIER \
107 " amoswap.d %0, %2, %1\n" \
108 : "=r" (__ret), "+A" (*__ptr) \
109 : "r" (__new) \
110 : "memory"); \
111 break; \
112 default: \
113 BUILD_BUG(); \
114 } \
115 __ret; \
116})
117
118#define xchg_release(ptr, x) \
119({ \
120 __typeof__(*(ptr)) _x_ = (x); \
121 (__typeof__(*(ptr))) __xchg_release((ptr), \
122 _x_, sizeof(*(ptr))); \
123})
124
125#define __xchg(ptr, new, size) \
126({ \
127 __typeof__(ptr) __ptr = (ptr); \
128 __typeof__(new) __new = (new); \
129 __typeof__(*(ptr)) __ret; \
130 switch (size) { \
131 case 4: \
132 __asm__ __volatile__ ( \
133 " amoswap.w.aqrl %0, %2, %1\n" \
134 : "=r" (__ret), "+A" (*__ptr) \
135 : "r" (__new) \
136 : "memory"); \
137 break; \
138 case 8: \
139 __asm__ __volatile__ ( \
140 " amoswap.d.aqrl %0, %2, %1\n" \
141 : "=r" (__ret), "+A" (*__ptr) \
142 : "r" (__new) \
143 : "memory"); \
144 break; \
145 default: \
146 BUILD_BUG(); \
147 } \
148 __ret; \
149})
150
151#define xchg(ptr, x) \
152({ \
153 __typeof__(*(ptr)) _x_ = (x); \
154 (__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \
155})
156
157#define xchg32(ptr, x) \
158({ \
159 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
160 xchg((ptr), (x)); \
161})
162
163#define xchg64(ptr, x) \
164({ \
165 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
166 xchg((ptr), (x)); \
59}) 167})
60 168
61/* 169/*
@@ -63,7 +171,51 @@
63 * store NEW in MEM. Return the initial value in MEM. Success is 171 * store NEW in MEM. Return the initial value in MEM. Success is
64 * indicated by comparing RETURN with OLD. 172 * indicated by comparing RETURN with OLD.
65 */ 173 */
66#define __cmpxchg(ptr, old, new, size, lrb, scb) \ 174#define __cmpxchg_relaxed(ptr, old, new, size) \
175({ \
176 __typeof__(ptr) __ptr = (ptr); \
177 __typeof__(*(ptr)) __old = (old); \
178 __typeof__(*(ptr)) __new = (new); \
179 __typeof__(*(ptr)) __ret; \
180 register unsigned int __rc; \
181 switch (size) { \
182 case 4: \
183 __asm__ __volatile__ ( \
184 "0: lr.w %0, %2\n" \
185 " bne %0, %z3, 1f\n" \
186 " sc.w %1, %z4, %2\n" \
187 " bnez %1, 0b\n" \
188 "1:\n" \
189 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
190 : "rJ" (__old), "rJ" (__new) \
191 : "memory"); \
192 break; \
193 case 8: \
194 __asm__ __volatile__ ( \
195 "0: lr.d %0, %2\n" \
196 " bne %0, %z3, 1f\n" \
197 " sc.d %1, %z4, %2\n" \
198 " bnez %1, 0b\n" \
199 "1:\n" \
200 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
201 : "rJ" (__old), "rJ" (__new) \
202 : "memory"); \
203 break; \
204 default: \
205 BUILD_BUG(); \
206 } \
207 __ret; \
208})
209
210#define cmpxchg_relaxed(ptr, o, n) \
211({ \
212 __typeof__(*(ptr)) _o_ = (o); \
213 __typeof__(*(ptr)) _n_ = (n); \
214 (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
215 _o_, _n_, sizeof(*(ptr))); \
216})
217
218#define __cmpxchg_acquire(ptr, old, new, size) \
67({ \ 219({ \
68 __typeof__(ptr) __ptr = (ptr); \ 220 __typeof__(ptr) __ptr = (ptr); \
69 __typeof__(*(ptr)) __old = (old); \ 221 __typeof__(*(ptr)) __old = (old); \
@@ -73,24 +225,24 @@
73 switch (size) { \ 225 switch (size) { \
74 case 4: \ 226 case 4: \
75 __asm__ __volatile__ ( \ 227 __asm__ __volatile__ ( \
76 "0:" \ 228 "0: lr.w %0, %2\n" \
77 "lr.w" #scb " %0, %2\n" \ 229 " bne %0, %z3, 1f\n" \
78 "bne %0, %z3, 1f\n" \ 230 " sc.w %1, %z4, %2\n" \
79 "sc.w" #lrb " %1, %z4, %2\n" \ 231 " bnez %1, 0b\n" \
80 "bnez %1, 0b\n" \ 232 RISCV_ACQUIRE_BARRIER \
81 "1:" \ 233 "1:\n" \
82 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ 234 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
83 : "rJ" (__old), "rJ" (__new) \ 235 : "rJ" (__old), "rJ" (__new) \
84 : "memory"); \ 236 : "memory"); \
85 break; \ 237 break; \
86 case 8: \ 238 case 8: \
87 __asm__ __volatile__ ( \ 239 __asm__ __volatile__ ( \
88 "0:" \ 240 "0: lr.d %0, %2\n" \
89 "lr.d" #scb " %0, %2\n" \ 241 " bne %0, %z3, 1f\n" \
90 "bne %0, %z3, 1f\n" \ 242 " sc.d %1, %z4, %2\n" \
91 "sc.d" #lrb " %1, %z4, %2\n" \ 243 " bnez %1, 0b\n" \
92 "bnez %1, 0b\n" \ 244 RISCV_ACQUIRE_BARRIER \
93 "1:" \ 245 "1:\n" \
94 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ 246 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
95 : "rJ" (__old), "rJ" (__new) \ 247 : "rJ" (__old), "rJ" (__new) \
96 : "memory"); \ 248 : "memory"); \
@@ -101,34 +253,131 @@
101 __ret; \ 253 __ret; \
102}) 254})
103 255
104#define cmpxchg(ptr, o, n) \ 256#define cmpxchg_acquire(ptr, o, n) \
105 (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), .aqrl, .aqrl)) 257({ \
258 __typeof__(*(ptr)) _o_ = (o); \
259 __typeof__(*(ptr)) _n_ = (n); \
260 (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
261 _o_, _n_, sizeof(*(ptr))); \
262})
106 263
107#define cmpxchg_local(ptr, o, n) \ 264#define __cmpxchg_release(ptr, old, new, size) \
108 (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), , )) 265({ \
266 __typeof__(ptr) __ptr = (ptr); \
267 __typeof__(*(ptr)) __old = (old); \
268 __typeof__(*(ptr)) __new = (new); \
269 __typeof__(*(ptr)) __ret; \
270 register unsigned int __rc; \
271 switch (size) { \
272 case 4: \
273 __asm__ __volatile__ ( \
274 RISCV_RELEASE_BARRIER \
275 "0: lr.w %0, %2\n" \
276 " bne %0, %z3, 1f\n" \
277 " sc.w %1, %z4, %2\n" \
278 " bnez %1, 0b\n" \
279 "1:\n" \
280 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
281 : "rJ" (__old), "rJ" (__new) \
282 : "memory"); \
283 break; \
284 case 8: \
285 __asm__ __volatile__ ( \
286 RISCV_RELEASE_BARRIER \
287 "0: lr.d %0, %2\n" \
288 " bne %0, %z3, 1f\n" \
289 " sc.d %1, %z4, %2\n" \
290 " bnez %1, 0b\n" \
291 "1:\n" \
292 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
293 : "rJ" (__old), "rJ" (__new) \
294 : "memory"); \
295 break; \
296 default: \
297 BUILD_BUG(); \
298 } \
299 __ret; \
300})
301
302#define cmpxchg_release(ptr, o, n) \
303({ \
304 __typeof__(*(ptr)) _o_ = (o); \
305 __typeof__(*(ptr)) _n_ = (n); \
306 (__typeof__(*(ptr))) __cmpxchg_release((ptr), \
307 _o_, _n_, sizeof(*(ptr))); \
308})
309
310#define __cmpxchg(ptr, old, new, size) \
311({ \
312 __typeof__(ptr) __ptr = (ptr); \
313 __typeof__(*(ptr)) __old = (old); \
314 __typeof__(*(ptr)) __new = (new); \
315 __typeof__(*(ptr)) __ret; \
316 register unsigned int __rc; \
317 switch (size) { \
318 case 4: \
319 __asm__ __volatile__ ( \
320 "0: lr.w %0, %2\n" \
321 " bne %0, %z3, 1f\n" \
322 " sc.w.rl %1, %z4, %2\n" \
323 " bnez %1, 0b\n" \
324 " fence rw, rw\n" \
325 "1:\n" \
326 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
327 : "rJ" (__old), "rJ" (__new) \
328 : "memory"); \
329 break; \
330 case 8: \
331 __asm__ __volatile__ ( \
332 "0: lr.d %0, %2\n" \
333 " bne %0, %z3, 1f\n" \
334 " sc.d.rl %1, %z4, %2\n" \
335 " bnez %1, 0b\n" \
336 " fence rw, rw\n" \
337 "1:\n" \
338 : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
339 : "rJ" (__old), "rJ" (__new) \
340 : "memory"); \
341 break; \
342 default: \
343 BUILD_BUG(); \
344 } \
345 __ret; \
346})
109 347
110#define cmpxchg32(ptr, o, n) \ 348#define cmpxchg(ptr, o, n) \
111({ \ 349({ \
112 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 350 __typeof__(*(ptr)) _o_ = (o); \
113 cmpxchg((ptr), (o), (n)); \ 351 __typeof__(*(ptr)) _n_ = (n); \
352 (__typeof__(*(ptr))) __cmpxchg((ptr), \
353 _o_, _n_, sizeof(*(ptr))); \
114}) 354})
115 355
116#define cmpxchg32_local(ptr, o, n) \ 356#define cmpxchg_local(ptr, o, n) \
117({ \ 357 (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
118 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 358
119 cmpxchg_local((ptr), (o), (n)); \ 359#define cmpxchg32(ptr, o, n) \
360({ \
361 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
362 cmpxchg((ptr), (o), (n)); \
120}) 363})
121 364
122#define cmpxchg64(ptr, o, n) \ 365#define cmpxchg32_local(ptr, o, n) \
123({ \ 366({ \
124 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 367 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
125 cmpxchg((ptr), (o), (n)); \ 368 cmpxchg_relaxed((ptr), (o), (n)) \
126}) 369})
127 370
128#define cmpxchg64_local(ptr, o, n) \ 371#define cmpxchg64(ptr, o, n) \
129({ \ 372({ \
130 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 373 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
131 cmpxchg_local((ptr), (o), (n)); \ 374 cmpxchg((ptr), (o), (n)); \
375})
376
377#define cmpxchg64_local(ptr, o, n) \
378({ \
379 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
380 cmpxchg_relaxed((ptr), (o), (n)); \
132}) 381})
133 382
134#endif /* _ASM_RISCV_CMPXCHG_H */ 383#endif /* _ASM_RISCV_CMPXCHG_H */
diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h
new file mode 100644
index 000000000000..2b443a3a487f
--- /dev/null
+++ b/arch/riscv/include/asm/fence.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_RISCV_FENCE_H
2#define _ASM_RISCV_FENCE_H
3
4#ifdef CONFIG_SMP
5#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n"
6#define RISCV_RELEASE_BARRIER "\tfence rw, w\n"
7#else
8#define RISCV_ACQUIRE_BARRIER
9#define RISCV_RELEASE_BARRIER
10#endif
11
12#endif /* _ASM_RISCV_FENCE_H */
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index 2fd27e8ef1fd..8eb26d1ede81 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <asm/current.h> 19#include <asm/current.h>
20#include <asm/fence.h>
20 21
21/* 22/*
22 * Simple spin lock operations. These provide no fairness guarantees. 23 * Simple spin lock operations. These provide no fairness guarantees.
@@ -28,10 +29,7 @@
28 29
29static inline void arch_spin_unlock(arch_spinlock_t *lock) 30static inline void arch_spin_unlock(arch_spinlock_t *lock)
30{ 31{
31 __asm__ __volatile__ ( 32 smp_store_release(&lock->lock, 0);
32 "amoswap.w.rl x0, x0, %0"
33 : "=A" (lock->lock)
34 :: "memory");
35} 33}
36 34
37static inline int arch_spin_trylock(arch_spinlock_t *lock) 35static inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
39 int tmp = 1, busy; 37 int tmp = 1, busy;
40 38
41 __asm__ __volatile__ ( 39 __asm__ __volatile__ (
42 "amoswap.w.aq %0, %2, %1" 40 " amoswap.w %0, %2, %1\n"
41 RISCV_ACQUIRE_BARRIER
43 : "=r" (busy), "+A" (lock->lock) 42 : "=r" (busy), "+A" (lock->lock)
44 : "r" (tmp) 43 : "r" (tmp)
45 : "memory"); 44 : "memory");
@@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
68 "1: lr.w %1, %0\n" 67 "1: lr.w %1, %0\n"
69 " bltz %1, 1b\n" 68 " bltz %1, 1b\n"
70 " addi %1, %1, 1\n" 69 " addi %1, %1, 1\n"
71 " sc.w.aq %1, %1, %0\n" 70 " sc.w %1, %1, %0\n"
72 " bnez %1, 1b\n" 71 " bnez %1, 1b\n"
72 RISCV_ACQUIRE_BARRIER
73 : "+A" (lock->lock), "=&r" (tmp) 73 : "+A" (lock->lock), "=&r" (tmp)
74 :: "memory"); 74 :: "memory");
75} 75}
@@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
82 "1: lr.w %1, %0\n" 82 "1: lr.w %1, %0\n"
83 " bnez %1, 1b\n" 83 " bnez %1, 1b\n"
84 " li %1, -1\n" 84 " li %1, -1\n"
85 " sc.w.aq %1, %1, %0\n" 85 " sc.w %1, %1, %0\n"
86 " bnez %1, 1b\n" 86 " bnez %1, 1b\n"
87 RISCV_ACQUIRE_BARRIER
87 : "+A" (lock->lock), "=&r" (tmp) 88 : "+A" (lock->lock), "=&r" (tmp)
88 :: "memory"); 89 :: "memory");
89} 90}
@@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
96 "1: lr.w %1, %0\n" 97 "1: lr.w %1, %0\n"
97 " bltz %1, 1f\n" 98 " bltz %1, 1f\n"
98 " addi %1, %1, 1\n" 99 " addi %1, %1, 1\n"
99 " sc.w.aq %1, %1, %0\n" 100 " sc.w %1, %1, %0\n"
100 " bnez %1, 1b\n" 101 " bnez %1, 1b\n"
102 RISCV_ACQUIRE_BARRIER
101 "1:\n" 103 "1:\n"
102 : "+A" (lock->lock), "=&r" (busy) 104 : "+A" (lock->lock), "=&r" (busy)
103 :: "memory"); 105 :: "memory");
@@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
113 "1: lr.w %1, %0\n" 115 "1: lr.w %1, %0\n"
114 " bnez %1, 1f\n" 116 " bnez %1, 1f\n"
115 " li %1, -1\n" 117 " li %1, -1\n"
116 " sc.w.aq %1, %1, %0\n" 118 " sc.w %1, %1, %0\n"
117 " bnez %1, 1b\n" 119 " bnez %1, 1b\n"
120 RISCV_ACQUIRE_BARRIER
118 "1:\n" 121 "1:\n"
119 : "+A" (lock->lock), "=&r" (busy) 122 : "+A" (lock->lock), "=&r" (busy)
120 :: "memory"); 123 :: "memory");
@@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
125static inline void arch_read_unlock(arch_rwlock_t *lock) 128static inline void arch_read_unlock(arch_rwlock_t *lock)
126{ 129{
127 __asm__ __volatile__( 130 __asm__ __volatile__(
128 "amoadd.w.rl x0, %1, %0" 131 RISCV_RELEASE_BARRIER
132 " amoadd.w x0, %1, %0\n"
129 : "+A" (lock->lock) 133 : "+A" (lock->lock)
130 : "r" (-1) 134 : "r" (-1)
131 : "memory"); 135 : "memory");
@@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
133 137
134static inline void arch_write_unlock(arch_rwlock_t *lock) 138static inline void arch_write_unlock(arch_rwlock_t *lock)
135{ 139{
136 __asm__ __volatile__ ( 140 smp_store_release(&lock->lock, 0);
137 "amoswap.w.rl x0, x0, %0"
138 : "=A" (lock->lock)
139 :: "memory");
140} 141}
141 142
142#endif /* _ASM_RISCV_SPINLOCK_H */ 143#endif /* _ASM_RISCV_SPINLOCK_H */