aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-02-04 07:29:12 -0500
committerCatalin Marinas <catalin.marinas@arm.com>2014-02-07 11:45:43 -0500
commit8e86f0b409a44193f1587e87b69c5dcf8f65be67 (patch)
tree6d8bd30d19ce3a392428d86cfd93003057d6aeb8
parent4a7ac12eedd190cdf071e61145defa73df1675c0 (diff)
arm64: atomics: fix use of acquire + release for full barrier semantics
Linux requires a number of atomic operations to provide full barrier semantics, that is no memory accesses after the operation can be observed before any accesses up to and including the operation in program order. On arm64, these operations have been incorrectly implemented as follows: // A, B, C are independent memory locations <Access [A]> // atomic_op (B) 1: ldaxr x0, [B] // Exclusive load with acquire <op(B)> stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b <Access [C]> The assumption here being that two half barriers are equivalent to a full barrier, so the only permitted ordering would be A -> B -> C (where B is the atomic operation involving both a load and a store). Unfortunately, this is not the case by the letter of the architecture and, in fact, the accesses to A and C are permitted to pass their nearest half barrier resulting in orderings such as Bl -> A -> C -> Bs or Bl -> C -> A -> Bs (where Bl is the load-acquire on B and Bs is the store-release on B). This is a clear violation of the full barrier requirement. The simple way to fix this is to implement the same algorithm as ARMv7 using explicit barriers: <Access [A]> // atomic_op (B) dmb ish // Full barrier 1: ldxr x0, [B] // Exclusive load <op(B)> stxr w1, x0, [B] // Exclusive store cbnz w1, 1b dmb ish // Full barrier <Access [C]> but this has the undesirable effect of introducing *two* full barrier instructions. A better approach is actually the following, non-intuitive sequence: <Access [A]> // atomic_op (B) 1: ldxr x0, [B] // Exclusive load <op(B)> stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b dmb ish // Full barrier <Access [C]> The simple observations here are: - The dmb ensures that no subsequent accesses (e.g. the access to C) can enter or pass the atomic sequence. - The dmb also ensures that no prior accesses (e.g. the access to A) can pass the atomic sequence. - Therefore, no prior access can pass a subsequent access, or vice-versa (i.e. A is strictly ordered before C). - The stlxr ensures that no prior access can pass the store component of the atomic operation. The only tricky part remaining is the ordering between the ldxr and the access to A, since the absence of the first dmb means that we're now permitting re-ordering between the ldxr and any prior accesses. From an (arbitrary) observer's point of view, there are two scenarios: 1. We have observed the ldxr. This means that if we perform a store to [B], the ldxr will still return older data. If we can observe the ldxr, then we can potentially observe the permitted re-ordering with the access to A, which is clearly an issue when compared to the dmb variant of the code. Thankfully, the exclusive monitor will save us here since it will be cleared as a result of the store and the ldxr will retry. Notice that any use of a later memory observation to imply observation of the ldxr will also imply observation of the access to A, since the stlxr/dmb ensure strict ordering. 2. We have not observed the ldxr. This means we can perform a store and influence the later ldxr. However, that doesn't actually tell us anything about the access to [A], so we've not lost anything here either when compared to the dmb variant. This patch implements this solution for our barriered atomic operations, ensuring that we satisfy the full barrier requirements where they are needed. Cc: <stable@vger.kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
-rw-r--r--arch/arm64/include/asm/atomic.h29
-rw-r--r--arch/arm64/include/asm/cmpxchg.h9
-rw-r--r--arch/arm64/include/asm/futex.h6
-rw-r--r--arch/arm64/kernel/kuser32.S6
-rw-r--r--arch/arm64/lib/bitops.S3
5 files changed, 35 insertions, 18 deletions
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 01de5aaa3edc..e32893e005d4 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -64,7 +64,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
64 int result; 64 int result;
65 65
66 asm volatile("// atomic_add_return\n" 66 asm volatile("// atomic_add_return\n"
67"1: ldaxr %w0, %2\n" 67"1: ldxr %w0, %2\n"
68" add %w0, %w0, %w3\n" 68" add %w0, %w0, %w3\n"
69" stlxr %w1, %w0, %2\n" 69" stlxr %w1, %w0, %2\n"
70" cbnz %w1, 1b" 70" cbnz %w1, 1b"
@@ -72,6 +72,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
72 : "Ir" (i) 72 : "Ir" (i)
73 : "cc", "memory"); 73 : "cc", "memory");
74 74
75 smp_mb();
75 return result; 76 return result;
76} 77}
77 78
@@ -96,7 +97,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
96 int result; 97 int result;
97 98
98 asm volatile("// atomic_sub_return\n" 99 asm volatile("// atomic_sub_return\n"
99"1: ldaxr %w0, %2\n" 100"1: ldxr %w0, %2\n"
100" sub %w0, %w0, %w3\n" 101" sub %w0, %w0, %w3\n"
101" stlxr %w1, %w0, %2\n" 102" stlxr %w1, %w0, %2\n"
102" cbnz %w1, 1b" 103" cbnz %w1, 1b"
@@ -104,6 +105,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
104 : "Ir" (i) 105 : "Ir" (i)
105 : "cc", "memory"); 106 : "cc", "memory");
106 107
108 smp_mb();
107 return result; 109 return result;
108} 110}
109 111
@@ -112,17 +114,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
112 unsigned long tmp; 114 unsigned long tmp;
113 int oldval; 115 int oldval;
114 116
117 smp_mb();
118
115 asm volatile("// atomic_cmpxchg\n" 119 asm volatile("// atomic_cmpxchg\n"
116"1: ldaxr %w1, %2\n" 120"1: ldxr %w1, %2\n"
117" cmp %w1, %w3\n" 121" cmp %w1, %w3\n"
118" b.ne 2f\n" 122" b.ne 2f\n"
119" stlxr %w0, %w4, %2\n" 123" stxr %w0, %w4, %2\n"
120" cbnz %w0, 1b\n" 124" cbnz %w0, 1b\n"
121"2:" 125"2:"
122 : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) 126 : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
123 : "Ir" (old), "r" (new) 127 : "Ir" (old), "r" (new)
124 : "cc", "memory"); 128 : "cc", "memory");
125 129
130 smp_mb();
126 return oldval; 131 return oldval;
127} 132}
128 133
@@ -183,7 +188,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
183 unsigned long tmp; 188 unsigned long tmp;
184 189
185 asm volatile("// atomic64_add_return\n" 190 asm volatile("// atomic64_add_return\n"
186"1: ldaxr %0, %2\n" 191"1: ldxr %0, %2\n"
187" add %0, %0, %3\n" 192" add %0, %0, %3\n"
188" stlxr %w1, %0, %2\n" 193" stlxr %w1, %0, %2\n"
189" cbnz %w1, 1b" 194" cbnz %w1, 1b"
@@ -191,6 +196,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
191 : "Ir" (i) 196 : "Ir" (i)
192 : "cc", "memory"); 197 : "cc", "memory");
193 198
199 smp_mb();
194 return result; 200 return result;
195} 201}
196 202
@@ -215,7 +221,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
215 unsigned long tmp; 221 unsigned long tmp;
216 222
217 asm volatile("// atomic64_sub_return\n" 223 asm volatile("// atomic64_sub_return\n"
218"1: ldaxr %0, %2\n" 224"1: ldxr %0, %2\n"
219" sub %0, %0, %3\n" 225" sub %0, %0, %3\n"
220" stlxr %w1, %0, %2\n" 226" stlxr %w1, %0, %2\n"
221" cbnz %w1, 1b" 227" cbnz %w1, 1b"
@@ -223,6 +229,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
223 : "Ir" (i) 229 : "Ir" (i)
224 : "cc", "memory"); 230 : "cc", "memory");
225 231
232 smp_mb();
226 return result; 233 return result;
227} 234}
228 235
@@ -231,17 +238,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
231 long oldval; 238 long oldval;
232 unsigned long res; 239 unsigned long res;
233 240
241 smp_mb();
242
234 asm volatile("// atomic64_cmpxchg\n" 243 asm volatile("// atomic64_cmpxchg\n"
235"1: ldaxr %1, %2\n" 244"1: ldxr %1, %2\n"
236" cmp %1, %3\n" 245" cmp %1, %3\n"
237" b.ne 2f\n" 246" b.ne 2f\n"
238" stlxr %w0, %4, %2\n" 247" stxr %w0, %4, %2\n"
239" cbnz %w0, 1b\n" 248" cbnz %w0, 1b\n"
240"2:" 249"2:"
241 : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) 250 : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
242 : "Ir" (old), "r" (new) 251 : "Ir" (old), "r" (new)
243 : "cc", "memory"); 252 : "cc", "memory");
244 253
254 smp_mb();
245 return oldval; 255 return oldval;
246} 256}
247 257
@@ -253,11 +263,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
253 unsigned long tmp; 263 unsigned long tmp;
254 264
255 asm volatile("// atomic64_dec_if_positive\n" 265 asm volatile("// atomic64_dec_if_positive\n"
256"1: ldaxr %0, %2\n" 266"1: ldxr %0, %2\n"
257" subs %0, %0, #1\n" 267" subs %0, %0, #1\n"
258" b.mi 2f\n" 268" b.mi 2f\n"
259" stlxr %w1, %0, %2\n" 269" stlxr %w1, %0, %2\n"
260" cbnz %w1, 1b\n" 270" cbnz %w1, 1b\n"
271" dmb ish\n"
261"2:" 272"2:"
262 : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) 273 : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
263 : 274 :
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 56166d7f4a25..189390ce8653 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -29,7 +29,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
29 switch (size) { 29 switch (size) {
30 case 1: 30 case 1:
31 asm volatile("// __xchg1\n" 31 asm volatile("// __xchg1\n"
32 "1: ldaxrb %w0, %2\n" 32 "1: ldxrb %w0, %2\n"
33 " stlxrb %w1, %w3, %2\n" 33 " stlxrb %w1, %w3, %2\n"
34 " cbnz %w1, 1b\n" 34 " cbnz %w1, 1b\n"
35 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) 35 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
@@ -38,7 +38,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
38 break; 38 break;
39 case 2: 39 case 2:
40 asm volatile("// __xchg2\n" 40 asm volatile("// __xchg2\n"
41 "1: ldaxrh %w0, %2\n" 41 "1: ldxrh %w0, %2\n"
42 " stlxrh %w1, %w3, %2\n" 42 " stlxrh %w1, %w3, %2\n"
43 " cbnz %w1, 1b\n" 43 " cbnz %w1, 1b\n"
44 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) 44 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
@@ -47,7 +47,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
47 break; 47 break;
48 case 4: 48 case 4:
49 asm volatile("// __xchg4\n" 49 asm volatile("// __xchg4\n"
50 "1: ldaxr %w0, %2\n" 50 "1: ldxr %w0, %2\n"
51 " stlxr %w1, %w3, %2\n" 51 " stlxr %w1, %w3, %2\n"
52 " cbnz %w1, 1b\n" 52 " cbnz %w1, 1b\n"
53 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) 53 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
@@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
56 break; 56 break;
57 case 8: 57 case 8:
58 asm volatile("// __xchg8\n" 58 asm volatile("// __xchg8\n"
59 "1: ldaxr %0, %2\n" 59 "1: ldxr %0, %2\n"
60 " stlxr %w1, %3, %2\n" 60 " stlxr %w1, %3, %2\n"
61 " cbnz %w1, 1b\n" 61 " cbnz %w1, 1b\n"
62 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) 62 : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
@@ -67,6 +67,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
67 BUILD_BUG(); 67 BUILD_BUG();
68 } 68 }
69 69
70 smp_mb();
70 return ret; 71 return ret;
71} 72}
72 73
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 78cc3aba5d69..572193d0005d 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -24,10 +24,11 @@
24 24
25#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ 25#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
26 asm volatile( \ 26 asm volatile( \
27"1: ldaxr %w1, %2\n" \ 27"1: ldxr %w1, %2\n" \
28 insn "\n" \ 28 insn "\n" \
29"2: stlxr %w3, %w0, %2\n" \ 29"2: stlxr %w3, %w0, %2\n" \
30" cbnz %w3, 1b\n" \ 30" cbnz %w3, 1b\n" \
31" dmb ish\n" \
31"3:\n" \ 32"3:\n" \
32" .pushsection .fixup,\"ax\"\n" \ 33" .pushsection .fixup,\"ax\"\n" \
33" .align 2\n" \ 34" .align 2\n" \
@@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
111 return -EFAULT; 112 return -EFAULT;
112 113
113 asm volatile("// futex_atomic_cmpxchg_inatomic\n" 114 asm volatile("// futex_atomic_cmpxchg_inatomic\n"
114"1: ldaxr %w1, %2\n" 115"1: ldxr %w1, %2\n"
115" sub %w3, %w1, %w4\n" 116" sub %w3, %w1, %w4\n"
116" cbnz %w3, 3f\n" 117" cbnz %w3, 3f\n"
117"2: stlxr %w3, %w5, %2\n" 118"2: stlxr %w3, %w5, %2\n"
118" cbnz %w3, 1b\n" 119" cbnz %w3, 1b\n"
120" dmb ish\n"
119"3:\n" 121"3:\n"
120" .pushsection .fixup,\"ax\"\n" 122" .pushsection .fixup,\"ax\"\n"
121"4: mov %w0, %w6\n" 123"4: mov %w0, %w6\n"
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 63c48ffdf230..7787208e8cc6 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -38,12 +38,13 @@ __kuser_cmpxchg64: // 0xffff0f60
38 .inst 0xe92d00f0 // push {r4, r5, r6, r7} 38 .inst 0xe92d00f0 // push {r4, r5, r6, r7}
39 .inst 0xe1c040d0 // ldrd r4, r5, [r0] 39 .inst 0xe1c040d0 // ldrd r4, r5, [r0]
40 .inst 0xe1c160d0 // ldrd r6, r7, [r1] 40 .inst 0xe1c160d0 // ldrd r6, r7, [r1]
41 .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] 41 .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2]
42 .inst 0xe0303004 // eors r3, r0, r4 42 .inst 0xe0303004 // eors r3, r0, r4
43 .inst 0x00313005 // eoreqs r3, r1, r5 43 .inst 0x00313005 // eoreqs r3, r1, r5
44 .inst 0x01a23e96 // stlexdeq r3, r6, [r2] 44 .inst 0x01a23e96 // stlexdeq r3, r6, [r2]
45 .inst 0x03330001 // teqeq r3, #1 45 .inst 0x03330001 // teqeq r3, #1
46 .inst 0x0afffff9 // beq 1b 46 .inst 0x0afffff9 // beq 1b
47 .inst 0xf57ff05b // dmb ish
47 .inst 0xe2730000 // rsbs r0, r3, #0 48 .inst 0xe2730000 // rsbs r0, r3, #0
48 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} 49 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7}
49 .inst 0xe12fff1e // bx lr 50 .inst 0xe12fff1e // bx lr
@@ -55,11 +56,12 @@ __kuser_memory_barrier: // 0xffff0fa0
55 56
56 .align 5 57 .align 5
57__kuser_cmpxchg: // 0xffff0fc0 58__kuser_cmpxchg: // 0xffff0fc0
58 .inst 0xe1923e9f // 1: ldaex r3, [r2] 59 .inst 0xe1923f9f // 1: ldrex r3, [r2]
59 .inst 0xe0533000 // subs r3, r3, r0 60 .inst 0xe0533000 // subs r3, r3, r0
60 .inst 0x01823e91 // stlexeq r3, r1, [r2] 61 .inst 0x01823e91 // stlexeq r3, r1, [r2]
61 .inst 0x03330001 // teqeq r3, #1 62 .inst 0x03330001 // teqeq r3, #1
62 .inst 0x0afffffa // beq 1b 63 .inst 0x0afffffa // beq 1b
64 .inst 0xf57ff05b // dmb ish
63 .inst 0xe2730000 // rsbs r0, r3, #0 65 .inst 0xe2730000 // rsbs r0, r3, #0
64 .inst 0xe12fff1e // bx lr 66 .inst 0xe12fff1e // bx lr
65 67
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
index e5db797790d3..7dac371cc9a2 100644
--- a/arch/arm64/lib/bitops.S
+++ b/arch/arm64/lib/bitops.S
@@ -46,11 +46,12 @@ ENTRY( \name )
46 mov x2, #1 46 mov x2, #1
47 add x1, x1, x0, lsr #3 // Get word offset 47 add x1, x1, x0, lsr #3 // Get word offset
48 lsl x4, x2, x3 // Create mask 48 lsl x4, x2, x3 // Create mask
491: ldaxr x2, [x1] 491: ldxr x2, [x1]
50 lsr x0, x2, x3 // Save old value of bit 50 lsr x0, x2, x3 // Save old value of bit
51 \instr x2, x2, x4 // toggle bit 51 \instr x2, x2, x4 // toggle bit
52 stlxr w5, x2, [x1] 52 stlxr w5, x2, [x1]
53 cbnz w5, 1b 53 cbnz w5, 1b
54 dmb ish
54 and x0, x0, #1 55 and x0, x0, #1
553: ret 563: ret
56ENDPROC(\name ) 57ENDPROC(\name )