diff options
-rw-r--r-- | arch/arc/include/asm/atomic.h | 21 | ||||
-rw-r--r-- | arch/arc/include/asm/bitops.h | 19 | ||||
-rw-r--r-- | arch/arc/include/asm/cmpxchg.h | 17 | ||||
-rw-r--r-- | arch/arc/include/asm/spinlock.h | 32 |
4 files changed, 89 insertions, 0 deletions
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430..20b7dc17979e 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h | |||
@@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ | |||
43 | { \ | 43 | { \ |
44 | unsigned int temp; \ | 44 | unsigned int temp; \ |
45 | \ | 45 | \ |
46 | /* \ | ||
47 | * Explicit full memory barrier needed before/after as \ | ||
48 | * LLOCK/SCOND thmeselves don't provide any such semantics \ | ||
49 | */ \ | ||
50 | smp_mb(); \ | ||
51 | \ | ||
46 | __asm__ __volatile__( \ | 52 | __asm__ __volatile__( \ |
47 | "1: llock %0, [%1] \n" \ | 53 | "1: llock %0, [%1] \n" \ |
48 | " " #asm_op " %0, %0, %2 \n" \ | 54 | " " #asm_op " %0, %0, %2 \n" \ |
@@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ | |||
52 | : "r"(&v->counter), "ir"(i) \ | 58 | : "r"(&v->counter), "ir"(i) \ |
53 | : "cc"); \ | 59 | : "cc"); \ |
54 | \ | 60 | \ |
61 | smp_mb(); \ | ||
62 | \ | ||
55 | return temp; \ | 63 | return temp; \ |
56 | } | 64 | } |
57 | 65 | ||
@@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ | |||
105 | unsigned long flags; \ | 113 | unsigned long flags; \ |
106 | unsigned long temp; \ | 114 | unsigned long temp; \ |
107 | \ | 115 | \ |
116 | /* \ | ||
117 | * spin lock/unlock provides the needed smp_mb() before/after \ | ||
118 | */ \ | ||
108 | atomic_ops_lock(flags); \ | 119 | atomic_ops_lock(flags); \ |
109 | temp = v->counter; \ | 120 | temp = v->counter; \ |
110 | temp c_op i; \ | 121 | temp c_op i; \ |
@@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) | |||
142 | #define __atomic_add_unless(v, a, u) \ | 153 | #define __atomic_add_unless(v, a, u) \ |
143 | ({ \ | 154 | ({ \ |
144 | int c, old; \ | 155 | int c, old; \ |
156 | \ | ||
157 | /* \ | ||
158 | * Explicit full memory barrier needed before/after as \ | ||
159 | * LLOCK/SCOND thmeselves don't provide any such semantics \ | ||
160 | */ \ | ||
161 | smp_mb(); \ | ||
162 | \ | ||
145 | c = atomic_read(v); \ | 163 | c = atomic_read(v); \ |
146 | while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ | 164 | while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ |
147 | c = old; \ | 165 | c = old; \ |
166 | \ | ||
167 | smp_mb(); \ | ||
168 | \ | ||
148 | c; \ | 169 | c; \ |
149 | }) | 170 | }) |
150 | 171 | ||
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939..624a9d048ca9 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h | |||
@@ -117,6 +117,12 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) | |||
117 | if (__builtin_constant_p(nr)) | 117 | if (__builtin_constant_p(nr)) |
118 | nr &= 0x1f; | 118 | nr &= 0x1f; |
119 | 119 | ||
120 | /* | ||
121 | * Explicit full memory barrier needed before/after as | ||
122 | * LLOCK/SCOND themselves don't provide any such semantics | ||
123 | */ | ||
124 | smp_mb(); | ||
125 | |||
120 | __asm__ __volatile__( | 126 | __asm__ __volatile__( |
121 | "1: llock %0, [%2] \n" | 127 | "1: llock %0, [%2] \n" |
122 | " bset %1, %0, %3 \n" | 128 | " bset %1, %0, %3 \n" |
@@ -126,6 +132,8 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) | |||
126 | : "r"(m), "ir"(nr) | 132 | : "r"(m), "ir"(nr) |
127 | : "cc"); | 133 | : "cc"); |
128 | 134 | ||
135 | smp_mb(); | ||
136 | |||
129 | return (old & (1 << nr)) != 0; | 137 | return (old & (1 << nr)) != 0; |
130 | } | 138 | } |
131 | 139 | ||
@@ -139,6 +147,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) | |||
139 | if (__builtin_constant_p(nr)) | 147 | if (__builtin_constant_p(nr)) |
140 | nr &= 0x1f; | 148 | nr &= 0x1f; |
141 | 149 | ||
150 | smp_mb(); | ||
151 | |||
142 | __asm__ __volatile__( | 152 | __asm__ __volatile__( |
143 | "1: llock %0, [%2] \n" | 153 | "1: llock %0, [%2] \n" |
144 | " bclr %1, %0, %3 \n" | 154 | " bclr %1, %0, %3 \n" |
@@ -148,6 +158,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) | |||
148 | : "r"(m), "ir"(nr) | 158 | : "r"(m), "ir"(nr) |
149 | : "cc"); | 159 | : "cc"); |
150 | 160 | ||
161 | smp_mb(); | ||
162 | |||
151 | return (old & (1 << nr)) != 0; | 163 | return (old & (1 << nr)) != 0; |
152 | } | 164 | } |
153 | 165 | ||
@@ -161,6 +173,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) | |||
161 | if (__builtin_constant_p(nr)) | 173 | if (__builtin_constant_p(nr)) |
162 | nr &= 0x1f; | 174 | nr &= 0x1f; |
163 | 175 | ||
176 | smp_mb(); | ||
177 | |||
164 | __asm__ __volatile__( | 178 | __asm__ __volatile__( |
165 | "1: llock %0, [%2] \n" | 179 | "1: llock %0, [%2] \n" |
166 | " bxor %1, %0, %3 \n" | 180 | " bxor %1, %0, %3 \n" |
@@ -170,6 +184,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) | |||
170 | : "r"(m), "ir"(nr) | 184 | : "r"(m), "ir"(nr) |
171 | : "cc"); | 185 | : "cc"); |
172 | 186 | ||
187 | smp_mb(); | ||
188 | |||
173 | return (old & (1 << nr)) != 0; | 189 | return (old & (1 << nr)) != 0; |
174 | } | 190 | } |
175 | 191 | ||
@@ -249,6 +265,9 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) | |||
249 | if (__builtin_constant_p(nr)) | 265 | if (__builtin_constant_p(nr)) |
250 | nr &= 0x1f; | 266 | nr &= 0x1f; |
251 | 267 | ||
268 | /* | ||
269 | * spin lock/unlock provide the needed smp_mb() before/after | ||
270 | */ | ||
252 | bitops_lock(flags); | 271 | bitops_lock(flags); |
253 | 272 | ||
254 | old = *m; | 273 | old = *m; |
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d..c9b1f461a587 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #define __ASM_ARC_CMPXCHG_H | 10 | #define __ASM_ARC_CMPXCHG_H |
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | |||
14 | #include <asm/barrier.h> | ||
13 | #include <asm/smp.h> | 15 | #include <asm/smp.h> |
14 | 16 | ||
15 | #ifdef CONFIG_ARC_HAS_LLSC | 17 | #ifdef CONFIG_ARC_HAS_LLSC |
@@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) | |||
19 | { | 21 | { |
20 | unsigned long prev; | 22 | unsigned long prev; |
21 | 23 | ||
24 | /* | ||
25 | * Explicit full memory barrier needed before/after as | ||
26 | * LLOCK/SCOND thmeselves don't provide any such semantics | ||
27 | */ | ||
28 | smp_mb(); | ||
29 | |||
22 | __asm__ __volatile__( | 30 | __asm__ __volatile__( |
23 | "1: llock %0, [%1] \n" | 31 | "1: llock %0, [%1] \n" |
24 | " brne %0, %2, 2f \n" | 32 | " brne %0, %2, 2f \n" |
@@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) | |||
30 | "r"(new) /* can't be "ir". scond can't take limm for "b" */ | 38 | "r"(new) /* can't be "ir". scond can't take limm for "b" */ |
31 | : "cc"); | 39 | : "cc"); |
32 | 40 | ||
41 | smp_mb(); | ||
42 | |||
33 | return prev; | 43 | return prev; |
34 | } | 44 | } |
35 | 45 | ||
@@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) | |||
42 | int prev; | 52 | int prev; |
43 | volatile unsigned long *p = ptr; | 53 | volatile unsigned long *p = ptr; |
44 | 54 | ||
55 | /* | ||
56 | * spin lock/unlock provide the needed smp_mb() before/after | ||
57 | */ | ||
45 | atomic_ops_lock(flags); | 58 | atomic_ops_lock(flags); |
46 | prev = *p; | 59 | prev = *p; |
47 | if (prev == expected) | 60 | if (prev == expected) |
@@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, | |||
77 | 90 | ||
78 | switch (size) { | 91 | switch (size) { |
79 | case 4: | 92 | case 4: |
93 | smp_mb(); | ||
94 | |||
80 | __asm__ __volatile__( | 95 | __asm__ __volatile__( |
81 | " ex %0, [%1] \n" | 96 | " ex %0, [%1] \n" |
82 | : "+r"(val) | 97 | : "+r"(val) |
83 | : "r"(ptr) | 98 | : "r"(ptr) |
84 | : "memory"); | 99 | : "memory"); |
85 | 100 | ||
101 | smp_mb(); | ||
102 | |||
86 | return val; | 103 | return val; |
87 | } | 104 | } |
88 | return __xchg_bad_pointer(); | 105 | return __xchg_bad_pointer(); |
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e..e1651df6a93d 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h | |||
@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
22 | { | 22 | { |
23 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; | 23 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; |
24 | 24 | ||
25 | /* | ||
26 | * This smp_mb() is technically superfluous, we only need the one | ||
27 | * after the lock for providing the ACQUIRE semantics. | ||
28 | * However doing the "right" thing was regressing hackbench | ||
29 | * so keeping this, pending further investigation | ||
30 | */ | ||
31 | smp_mb(); | ||
32 | |||
25 | __asm__ __volatile__( | 33 | __asm__ __volatile__( |
26 | "1: ex %0, [%1] \n" | 34 | "1: ex %0, [%1] \n" |
27 | " breq %0, %2, 1b \n" | 35 | " breq %0, %2, 1b \n" |
28 | : "+&r" (tmp) | 36 | : "+&r" (tmp) |
29 | : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) | 37 | : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) |
30 | : "memory"); | 38 | : "memory"); |
39 | |||
40 | /* | ||
41 | * ACQUIRE barrier to ensure load/store after taking the lock | ||
42 | * don't "bleed-up" out of the critical section (leak-in is allowed) | ||
43 | * http://www.spinics.net/lists/kernel/msg2010409.html | ||
44 | * | ||
45 | * ARCv2 only has load-load, store-store and all-all barrier | ||
46 | * thus need the full all-all barrier | ||
47 | */ | ||
48 | smp_mb(); | ||
31 | } | 49 | } |
32 | 50 | ||
33 | static inline int arch_spin_trylock(arch_spinlock_t *lock) | 51 | static inline int arch_spin_trylock(arch_spinlock_t *lock) |
34 | { | 52 | { |
35 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; | 53 | unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; |
36 | 54 | ||
55 | smp_mb(); | ||
56 | |||
37 | __asm__ __volatile__( | 57 | __asm__ __volatile__( |
38 | "1: ex %0, [%1] \n" | 58 | "1: ex %0, [%1] \n" |
39 | : "+r" (tmp) | 59 | : "+r" (tmp) |
40 | : "r"(&(lock->slock)) | 60 | : "r"(&(lock->slock)) |
41 | : "memory"); | 61 | : "memory"); |
42 | 62 | ||
63 | smp_mb(); | ||
64 | |||
43 | return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); | 65 | return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); |
44 | } | 66 | } |
45 | 67 | ||
@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) | |||
47 | { | 69 | { |
48 | unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; | 70 | unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; |
49 | 71 | ||
72 | /* | ||
73 | * RELEASE barrier: given the instructions avail on ARCv2, full barrier | ||
74 | * is the only option | ||
75 | */ | ||
76 | smp_mb(); | ||
77 | |||
50 | __asm__ __volatile__( | 78 | __asm__ __volatile__( |
51 | " ex %0, [%1] \n" | 79 | " ex %0, [%1] \n" |
52 | : "+r" (tmp) | 80 | : "+r" (tmp) |
53 | : "r"(&(lock->slock)) | 81 | : "r"(&(lock->slock)) |
54 | : "memory"); | 82 | : "memory"); |
55 | 83 | ||
84 | /* | ||
85 | * superfluous, but keeping for now - see pairing version in | ||
86 | * arch_spin_lock above | ||
87 | */ | ||
56 | smp_mb(); | 88 | smp_mb(); |
57 | } | 89 | } |
58 | 90 | ||