aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-11-15 12:11:27 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-11-17 00:26:07 -0500
commitb97021f85517552ea8a0d2c1680c1ee4beab6d14 (patch)
treef8f4c0af8d7a76d405fcae62f2ddecff642cc4e9 /arch/powerpc
parenta9a8f77ac72d6dd3c92ea268291678836f77681c (diff)
powerpc: Fix atomic_xxx_return barrier semantics
The Documentation/memory-barriers.txt document requires that atomic operations that return a value act as a memory barrier both before and after the actual atomic operation. Our current implementation doesn't guarantee this. More specifically, while a load following the isync can not be issued before stwcx. has completed, that completion doesn't architecturally means that the result of stwcx. is visible to other processors (or any previous stores for that matter) (typically, the other processors L1 caches can still hold the old value). This has caused an actual crash in RCU torture testing on Power 7 This fixes it by changing those atomic ops to use new macros instead of RELEASE/ACQUIRE barriers, called ATOMIC_ENTRY and ATMOIC_EXIT barriers, which are then defined respectively to lwsync and sync. I haven't had a chance to measure the performance impact (or rather what I measured with kernel compiles is in the noise, I yet have to find a more precise benchmark) Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/atomic.h48
-rw-r--r--arch/powerpc/include/asm/bitops.h12
-rw-r--r--arch/powerpc/include/asm/futex.h7
-rw-r--r--arch/powerpc/include/asm/synch.h8
4 files changed, 40 insertions, 35 deletions
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index e2a4c26ad377..02e41b53488d 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -49,13 +49,13 @@ static __inline__ int atomic_add_return(int a, atomic_t *v)
49 int t; 49 int t;
50 50
51 __asm__ __volatile__( 51 __asm__ __volatile__(
52 PPC_RELEASE_BARRIER 52 PPC_ATOMIC_ENTRY_BARRIER
53"1: lwarx %0,0,%2 # atomic_add_return\n\ 53"1: lwarx %0,0,%2 # atomic_add_return\n\
54 add %0,%1,%0\n" 54 add %0,%1,%0\n"
55 PPC405_ERR77(0,%2) 55 PPC405_ERR77(0,%2)
56" stwcx. %0,0,%2 \n\ 56" stwcx. %0,0,%2 \n\
57 bne- 1b" 57 bne- 1b"
58 PPC_ACQUIRE_BARRIER 58 PPC_ATOMIC_EXIT_BARRIER
59 : "=&r" (t) 59 : "=&r" (t)
60 : "r" (a), "r" (&v->counter) 60 : "r" (a), "r" (&v->counter)
61 : "cc", "memory"); 61 : "cc", "memory");
@@ -85,13 +85,13 @@ static __inline__ int atomic_sub_return(int a, atomic_t *v)
85 int t; 85 int t;
86 86
87 __asm__ __volatile__( 87 __asm__ __volatile__(
88 PPC_RELEASE_BARRIER 88 PPC_ATOMIC_ENTRY_BARRIER
89"1: lwarx %0,0,%2 # atomic_sub_return\n\ 89"1: lwarx %0,0,%2 # atomic_sub_return\n\
90 subf %0,%1,%0\n" 90 subf %0,%1,%0\n"
91 PPC405_ERR77(0,%2) 91 PPC405_ERR77(0,%2)
92" stwcx. %0,0,%2 \n\ 92" stwcx. %0,0,%2 \n\
93 bne- 1b" 93 bne- 1b"
94 PPC_ACQUIRE_BARRIER 94 PPC_ATOMIC_EXIT_BARRIER
95 : "=&r" (t) 95 : "=&r" (t)
96 : "r" (a), "r" (&v->counter) 96 : "r" (a), "r" (&v->counter)
97 : "cc", "memory"); 97 : "cc", "memory");
@@ -119,13 +119,13 @@ static __inline__ int atomic_inc_return(atomic_t *v)
119 int t; 119 int t;
120 120
121 __asm__ __volatile__( 121 __asm__ __volatile__(
122 PPC_RELEASE_BARRIER 122 PPC_ATOMIC_ENTRY_BARRIER
123"1: lwarx %0,0,%1 # atomic_inc_return\n\ 123"1: lwarx %0,0,%1 # atomic_inc_return\n\
124 addic %0,%0,1\n" 124 addic %0,%0,1\n"
125 PPC405_ERR77(0,%1) 125 PPC405_ERR77(0,%1)
126" stwcx. %0,0,%1 \n\ 126" stwcx. %0,0,%1 \n\
127 bne- 1b" 127 bne- 1b"
128 PPC_ACQUIRE_BARRIER 128 PPC_ATOMIC_EXIT_BARRIER
129 : "=&r" (t) 129 : "=&r" (t)
130 : "r" (&v->counter) 130 : "r" (&v->counter)
131 : "cc", "xer", "memory"); 131 : "cc", "xer", "memory");
@@ -163,13 +163,13 @@ static __inline__ int atomic_dec_return(atomic_t *v)
163 int t; 163 int t;
164 164
165 __asm__ __volatile__( 165 __asm__ __volatile__(
166 PPC_RELEASE_BARRIER 166 PPC_ATOMIC_ENTRY_BARRIER
167"1: lwarx %0,0,%1 # atomic_dec_return\n\ 167"1: lwarx %0,0,%1 # atomic_dec_return\n\
168 addic %0,%0,-1\n" 168 addic %0,%0,-1\n"
169 PPC405_ERR77(0,%1) 169 PPC405_ERR77(0,%1)
170" stwcx. %0,0,%1\n\ 170" stwcx. %0,0,%1\n\
171 bne- 1b" 171 bne- 1b"
172 PPC_ACQUIRE_BARRIER 172 PPC_ATOMIC_EXIT_BARRIER
173 : "=&r" (t) 173 : "=&r" (t)
174 : "r" (&v->counter) 174 : "r" (&v->counter)
175 : "cc", "xer", "memory"); 175 : "cc", "xer", "memory");
@@ -194,7 +194,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
194 int t; 194 int t;
195 195
196 __asm__ __volatile__ ( 196 __asm__ __volatile__ (
197 PPC_RELEASE_BARRIER 197 PPC_ATOMIC_ENTRY_BARRIER
198"1: lwarx %0,0,%1 # __atomic_add_unless\n\ 198"1: lwarx %0,0,%1 # __atomic_add_unless\n\
199 cmpw 0,%0,%3 \n\ 199 cmpw 0,%0,%3 \n\
200 beq- 2f \n\ 200 beq- 2f \n\
@@ -202,7 +202,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
202 PPC405_ERR77(0,%2) 202 PPC405_ERR77(0,%2)
203" stwcx. %0,0,%1 \n\ 203" stwcx. %0,0,%1 \n\
204 bne- 1b \n" 204 bne- 1b \n"
205 PPC_ACQUIRE_BARRIER 205 PPC_ATOMIC_EXIT_BARRIER
206" subf %0,%2,%0 \n\ 206" subf %0,%2,%0 \n\
2072:" 2072:"
208 : "=&r" (t) 208 : "=&r" (t)
@@ -226,7 +226,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
226 int t; 226 int t;
227 227
228 __asm__ __volatile__( 228 __asm__ __volatile__(
229 PPC_RELEASE_BARRIER 229 PPC_ATOMIC_ENTRY_BARRIER
230"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\ 230"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\
231 cmpwi %0,1\n\ 231 cmpwi %0,1\n\
232 addi %0,%0,-1\n\ 232 addi %0,%0,-1\n\
@@ -234,7 +234,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
234 PPC405_ERR77(0,%1) 234 PPC405_ERR77(0,%1)
235" stwcx. %0,0,%1\n\ 235" stwcx. %0,0,%1\n\
236 bne- 1b" 236 bne- 1b"
237 PPC_ACQUIRE_BARRIER 237 PPC_ATOMIC_EXIT_BARRIER
238 "\n\ 238 "\n\
2392:" : "=&b" (t) 2392:" : "=&b" (t)
240 : "r" (&v->counter) 240 : "r" (&v->counter)
@@ -285,12 +285,12 @@ static __inline__ long atomic64_add_return(long a, atomic64_t *v)
285 long t; 285 long t;
286 286
287 __asm__ __volatile__( 287 __asm__ __volatile__(
288 PPC_RELEASE_BARRIER 288 PPC_ATOMIC_ENTRY_BARRIER
289"1: ldarx %0,0,%2 # atomic64_add_return\n\ 289"1: ldarx %0,0,%2 # atomic64_add_return\n\
290 add %0,%1,%0\n\ 290 add %0,%1,%0\n\
291 stdcx. %0,0,%2 \n\ 291 stdcx. %0,0,%2 \n\
292 bne- 1b" 292 bne- 1b"
293 PPC_ACQUIRE_BARRIER 293 PPC_ATOMIC_EXIT_BARRIER
294 : "=&r" (t) 294 : "=&r" (t)
295 : "r" (a), "r" (&v->counter) 295 : "r" (a), "r" (&v->counter)
296 : "cc", "memory"); 296 : "cc", "memory");
@@ -319,12 +319,12 @@ static __inline__ long atomic64_sub_return(long a, atomic64_t *v)
319 long t; 319 long t;
320 320
321 __asm__ __volatile__( 321 __asm__ __volatile__(
322 PPC_RELEASE_BARRIER 322 PPC_ATOMIC_ENTRY_BARRIER
323"1: ldarx %0,0,%2 # atomic64_sub_return\n\ 323"1: ldarx %0,0,%2 # atomic64_sub_return\n\
324 subf %0,%1,%0\n\ 324 subf %0,%1,%0\n\
325 stdcx. %0,0,%2 \n\ 325 stdcx. %0,0,%2 \n\
326 bne- 1b" 326 bne- 1b"
327 PPC_ACQUIRE_BARRIER 327 PPC_ATOMIC_EXIT_BARRIER
328 : "=&r" (t) 328 : "=&r" (t)
329 : "r" (a), "r" (&v->counter) 329 : "r" (a), "r" (&v->counter)
330 : "cc", "memory"); 330 : "cc", "memory");
@@ -351,12 +351,12 @@ static __inline__ long atomic64_inc_return(atomic64_t *v)
351 long t; 351 long t;
352 352
353 __asm__ __volatile__( 353 __asm__ __volatile__(
354 PPC_RELEASE_BARRIER 354 PPC_ATOMIC_ENTRY_BARRIER
355"1: ldarx %0,0,%1 # atomic64_inc_return\n\ 355"1: ldarx %0,0,%1 # atomic64_inc_return\n\
356 addic %0,%0,1\n\ 356 addic %0,%0,1\n\
357 stdcx. %0,0,%1 \n\ 357 stdcx. %0,0,%1 \n\
358 bne- 1b" 358 bne- 1b"
359 PPC_ACQUIRE_BARRIER 359 PPC_ATOMIC_EXIT_BARRIER
360 : "=&r" (t) 360 : "=&r" (t)
361 : "r" (&v->counter) 361 : "r" (&v->counter)
362 : "cc", "xer", "memory"); 362 : "cc", "xer", "memory");
@@ -393,12 +393,12 @@ static __inline__ long atomic64_dec_return(atomic64_t *v)
393 long t; 393 long t;
394 394
395 __asm__ __volatile__( 395 __asm__ __volatile__(
396 PPC_RELEASE_BARRIER 396 PPC_ATOMIC_ENTRY_BARRIER
397"1: ldarx %0,0,%1 # atomic64_dec_return\n\ 397"1: ldarx %0,0,%1 # atomic64_dec_return\n\
398 addic %0,%0,-1\n\ 398 addic %0,%0,-1\n\
399 stdcx. %0,0,%1\n\ 399 stdcx. %0,0,%1\n\
400 bne- 1b" 400 bne- 1b"
401 PPC_ACQUIRE_BARRIER 401 PPC_ATOMIC_EXIT_BARRIER
402 : "=&r" (t) 402 : "=&r" (t)
403 : "r" (&v->counter) 403 : "r" (&v->counter)
404 : "cc", "xer", "memory"); 404 : "cc", "xer", "memory");
@@ -418,13 +418,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
418 long t; 418 long t;
419 419
420 __asm__ __volatile__( 420 __asm__ __volatile__(
421 PPC_RELEASE_BARRIER 421 PPC_ATOMIC_ENTRY_BARRIER
422"1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\ 422"1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\
423 addic. %0,%0,-1\n\ 423 addic. %0,%0,-1\n\
424 blt- 2f\n\ 424 blt- 2f\n\
425 stdcx. %0,0,%1\n\ 425 stdcx. %0,0,%1\n\
426 bne- 1b" 426 bne- 1b"
427 PPC_ACQUIRE_BARRIER 427 PPC_ATOMIC_EXIT_BARRIER
428 "\n\ 428 "\n\
4292:" : "=&r" (t) 4292:" : "=&r" (t)
430 : "r" (&v->counter) 430 : "r" (&v->counter)
@@ -450,14 +450,14 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
450 long t; 450 long t;
451 451
452 __asm__ __volatile__ ( 452 __asm__ __volatile__ (
453 PPC_RELEASE_BARRIER 453 PPC_ATOMIC_ENTRY_BARRIER
454"1: ldarx %0,0,%1 # __atomic_add_unless\n\ 454"1: ldarx %0,0,%1 # __atomic_add_unless\n\
455 cmpd 0,%0,%3 \n\ 455 cmpd 0,%0,%3 \n\
456 beq- 2f \n\ 456 beq- 2f \n\
457 add %0,%2,%0 \n" 457 add %0,%2,%0 \n"
458" stdcx. %0,0,%1 \n\ 458" stdcx. %0,0,%1 \n\
459 bne- 1b \n" 459 bne- 1b \n"
460 PPC_ACQUIRE_BARRIER 460 PPC_ATOMIC_EXIT_BARRIER
461" subf %0,%2,%0 \n\ 461" subf %0,%2,%0 \n\
4622:" 4622:"
463 : "=&r" (t) 463 : "=&r" (t)
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index e137afcc10fa..efdc92618b38 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -124,14 +124,14 @@ static __inline__ unsigned long fn( \
124 return (old & mask); \ 124 return (old & mask); \
125} 125}
126 126
127DEFINE_TESTOP(test_and_set_bits, or, PPC_RELEASE_BARRIER, 127DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
128 PPC_ACQUIRE_BARRIER, 0) 128 PPC_ATOMIC_EXIT_BARRIER, 0)
129DEFINE_TESTOP(test_and_set_bits_lock, or, "", 129DEFINE_TESTOP(test_and_set_bits_lock, or, "",
130 PPC_ACQUIRE_BARRIER, 1) 130 PPC_ACQUIRE_BARRIER, 1)
131DEFINE_TESTOP(test_and_clear_bits, andc, PPC_RELEASE_BARRIER, 131DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
132 PPC_ACQUIRE_BARRIER, 0) 132 PPC_ATOMIC_EXIT_BARRIER, 0)
133DEFINE_TESTOP(test_and_change_bits, xor, PPC_RELEASE_BARRIER, 133DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
134 PPC_ACQUIRE_BARRIER, 0) 134 PPC_ATOMIC_EXIT_BARRIER, 0)
135 135
136static __inline__ int test_and_set_bit(unsigned long nr, 136static __inline__ int test_and_set_bit(unsigned long nr,
137 volatile unsigned long *addr) 137 volatile unsigned long *addr)
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index c94e4a3fe2ef..2a9cf845473b 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -11,12 +11,13 @@
11 11
12#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ 12#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
13 __asm__ __volatile ( \ 13 __asm__ __volatile ( \
14 PPC_RELEASE_BARRIER \ 14 PPC_ATOMIC_ENTRY_BARRIER \
15"1: lwarx %0,0,%2\n" \ 15"1: lwarx %0,0,%2\n" \
16 insn \ 16 insn \
17 PPC405_ERR77(0, %2) \ 17 PPC405_ERR77(0, %2) \
18"2: stwcx. %1,0,%2\n" \ 18"2: stwcx. %1,0,%2\n" \
19 "bne- 1b\n" \ 19 "bne- 1b\n" \
20 PPC_ATOMIC_EXIT_BARRIER \
20 "li %1,0\n" \ 21 "li %1,0\n" \
21"3: .section .fixup,\"ax\"\n" \ 22"3: .section .fixup,\"ax\"\n" \
22"4: li %1,%3\n" \ 23"4: li %1,%3\n" \
@@ -92,14 +93,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
92 return -EFAULT; 93 return -EFAULT;
93 94
94 __asm__ __volatile__ ( 95 __asm__ __volatile__ (
95 PPC_RELEASE_BARRIER 96 PPC_ATOMIC_ENTRY_BARRIER
96"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ 97"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
97 cmpw 0,%1,%4\n\ 98 cmpw 0,%1,%4\n\
98 bne- 3f\n" 99 bne- 3f\n"
99 PPC405_ERR77(0,%3) 100 PPC405_ERR77(0,%3)
100"2: stwcx. %5,0,%3\n\ 101"2: stwcx. %5,0,%3\n\
101 bne- 1b\n" 102 bne- 1b\n"
102 PPC_ACQUIRE_BARRIER 103 PPC_ATOMIC_EXIT_BARRIER
103"3: .section .fixup,\"ax\"\n\ 104"3: .section .fixup,\"ax\"\n\
1044: li %0,%6\n\ 1054: li %0,%6\n\
105 b 3b\n\ 106 b 3b\n\
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index 87878c68d1c2..e682a7143edb 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -42,11 +42,15 @@ static inline void isync(void)
42 START_LWSYNC_SECTION(97); \ 42 START_LWSYNC_SECTION(97); \
43 isync; \ 43 isync; \
44 MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); 44 MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
45#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) 45#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
46#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" 46#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
47#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n"
48#define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n"
47#else 49#else
48#define PPC_ACQUIRE_BARRIER 50#define PPC_ACQUIRE_BARRIER
49#define PPC_RELEASE_BARRIER 51#define PPC_RELEASE_BARRIER
52#define PPC_ATOMIC_ENTRY_BARRIER
53#define PPC_ATOMIC_EXIT_BARRIER
50#endif 54#endif
51 55
52#endif /* __KERNEL__ */ 56#endif /* __KERNEL__ */