diff options
Diffstat (limited to 'arch/x86/include/asm/cmpxchg_32.h')
-rw-r--r-- | arch/x86/include/asm/cmpxchg_32.h | 198 |
1 files changed, 89 insertions, 109 deletions
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 8859e12dd3cf..284a6e8f7ce1 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -11,38 +11,42 @@ | |||
11 | extern void __xchg_wrong_size(void); | 11 | extern void __xchg_wrong_size(void); |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | 14 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. |
15 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | 15 | * Since this is generally used to protect other memory information, we |
16 | * but generally the primitive is invalid, *ptr is output argument. --ANK | 16 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving |
17 | * information around. | ||
17 | */ | 18 | */ |
18 | |||
19 | struct __xchg_dummy { | ||
20 | unsigned long a[100]; | ||
21 | }; | ||
22 | #define __xg(x) ((struct __xchg_dummy *)(x)) | ||
23 | |||
24 | #define __xchg(x, ptr, size) \ | 19 | #define __xchg(x, ptr, size) \ |
25 | ({ \ | 20 | ({ \ |
26 | __typeof(*(ptr)) __x = (x); \ | 21 | __typeof(*(ptr)) __x = (x); \ |
27 | switch (size) { \ | 22 | switch (size) { \ |
28 | case 1: \ | 23 | case 1: \ |
29 | asm volatile("xchgb %b0,%1" \ | 24 | { \ |
30 | : "=q" (__x) \ | 25 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ |
31 | : "m" (*__xg(ptr)), "0" (__x) \ | 26 | asm volatile("xchgb %0,%1" \ |
27 | : "=q" (__x), "+m" (*__ptr) \ | ||
28 | : "0" (__x) \ | ||
32 | : "memory"); \ | 29 | : "memory"); \ |
33 | break; \ | 30 | break; \ |
31 | } \ | ||
34 | case 2: \ | 32 | case 2: \ |
35 | asm volatile("xchgw %w0,%1" \ | 33 | { \ |
36 | : "=r" (__x) \ | 34 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ |
37 | : "m" (*__xg(ptr)), "0" (__x) \ | 35 | asm volatile("xchgw %0,%1" \ |
36 | : "=r" (__x), "+m" (*__ptr) \ | ||
37 | : "0" (__x) \ | ||
38 | : "memory"); \ | 38 | : "memory"); \ |
39 | break; \ | 39 | break; \ |
40 | } \ | ||
40 | case 4: \ | 41 | case 4: \ |
42 | { \ | ||
43 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ | ||
41 | asm volatile("xchgl %0,%1" \ | 44 | asm volatile("xchgl %0,%1" \ |
42 | : "=r" (__x) \ | 45 | : "=r" (__x), "+m" (*__ptr) \ |
43 | : "m" (*__xg(ptr)), "0" (__x) \ | 46 | : "0" (__x) \ |
44 | : "memory"); \ | 47 | : "memory"); \ |
45 | break; \ | 48 | break; \ |
49 | } \ | ||
46 | default: \ | 50 | default: \ |
47 | __xchg_wrong_size(); \ | 51 | __xchg_wrong_size(); \ |
48 | } \ | 52 | } \ |
@@ -53,60 +57,33 @@ struct __xchg_dummy { | |||
53 | __xchg((v), (ptr), sizeof(*ptr)) | 57 | __xchg((v), (ptr), sizeof(*ptr)) |
54 | 58 | ||
55 | /* | 59 | /* |
56 | * The semantics of XCHGCMP8B are a bit strange, this is why | 60 | * CMPXCHG8B only writes to the target if we had the previous |
57 | * there is a loop and the loading of %%eax and %%edx has to | 61 | * value in registers, otherwise it acts as a read and gives us the |
58 | * be inside. This inlines well in most cases, the cached | 62 | * "new previous" value. That is why there is a loop. Preloading |
59 | * cost is around ~38 cycles. (in the future we might want | 63 | * EDX:EAX is a performance optimization: in the common case it means |
60 | * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that | 64 | * we need only one locked operation. |
61 | * might have an implicit FPU-save as a cost, so it's not | ||
62 | * clear which path to go.) | ||
63 | * | 65 | * |
64 | * cmpxchg8b must be used with the lock prefix here to allow | 66 | * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very |
65 | * the instruction to be executed atomically, see page 3-102 | 67 | * least an FPU save and/or %cr0.ts manipulation. |
66 | * of the instruction set reference 24319102.pdf. We need | 68 | * |
67 | * the reader side to see the coherent 64bit value. | 69 | * cmpxchg8b must be used with the lock prefix here to allow the |
70 | * instruction to be executed atomically. We need to have the reader | ||
71 | * side to see the coherent 64bit value. | ||
68 | */ | 72 | */ |
69 | static inline void __set_64bit(unsigned long long *ptr, | 73 | static inline void set_64bit(volatile u64 *ptr, u64 value) |
70 | unsigned int low, unsigned int high) | ||
71 | { | 74 | { |
75 | u32 low = value; | ||
76 | u32 high = value >> 32; | ||
77 | u64 prev = *ptr; | ||
78 | |||
72 | asm volatile("\n1:\t" | 79 | asm volatile("\n1:\t" |
73 | "movl (%0), %%eax\n\t" | 80 | LOCK_PREFIX "cmpxchg8b %0\n\t" |
74 | "movl 4(%0), %%edx\n\t" | ||
75 | LOCK_PREFIX "cmpxchg8b (%0)\n\t" | ||
76 | "jnz 1b" | 81 | "jnz 1b" |
77 | : /* no outputs */ | 82 | : "=m" (*ptr), "+A" (prev) |
78 | : "D"(ptr), | 83 | : "b" (low), "c" (high) |
79 | "b"(low), | 84 | : "memory"); |
80 | "c"(high) | ||
81 | : "ax", "dx", "memory"); | ||
82 | } | ||
83 | |||
84 | static inline void __set_64bit_constant(unsigned long long *ptr, | ||
85 | unsigned long long value) | ||
86 | { | ||
87 | __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); | ||
88 | } | ||
89 | |||
90 | #define ll_low(x) *(((unsigned int *)&(x)) + 0) | ||
91 | #define ll_high(x) *(((unsigned int *)&(x)) + 1) | ||
92 | |||
93 | static inline void __set_64bit_var(unsigned long long *ptr, | ||
94 | unsigned long long value) | ||
95 | { | ||
96 | __set_64bit(ptr, ll_low(value), ll_high(value)); | ||
97 | } | 85 | } |
98 | 86 | ||
99 | #define set_64bit(ptr, value) \ | ||
100 | (__builtin_constant_p((value)) \ | ||
101 | ? __set_64bit_constant((ptr), (value)) \ | ||
102 | : __set_64bit_var((ptr), (value))) | ||
103 | |||
104 | #define _set_64bit(ptr, value) \ | ||
105 | (__builtin_constant_p(value) \ | ||
106 | ? __set_64bit(ptr, (unsigned int)(value), \ | ||
107 | (unsigned int)((value) >> 32)) \ | ||
108 | : __set_64bit(ptr, ll_low((value)), ll_high((value)))) | ||
109 | |||
110 | extern void __cmpxchg_wrong_size(void); | 87 | extern void __cmpxchg_wrong_size(void); |
111 | 88 | ||
112 | /* | 89 | /* |
@@ -121,23 +98,32 @@ extern void __cmpxchg_wrong_size(void); | |||
121 | __typeof__(*(ptr)) __new = (new); \ | 98 | __typeof__(*(ptr)) __new = (new); \ |
122 | switch (size) { \ | 99 | switch (size) { \ |
123 | case 1: \ | 100 | case 1: \ |
124 | asm volatile(lock "cmpxchgb %b1,%2" \ | 101 | { \ |
125 | : "=a"(__ret) \ | 102 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ |
126 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 103 | asm volatile(lock "cmpxchgb %2,%1" \ |
104 | : "=a" (__ret), "+m" (*__ptr) \ | ||
105 | : "q" (__new), "0" (__old) \ | ||
127 | : "memory"); \ | 106 | : "memory"); \ |
128 | break; \ | 107 | break; \ |
108 | } \ | ||
129 | case 2: \ | 109 | case 2: \ |
130 | asm volatile(lock "cmpxchgw %w1,%2" \ | 110 | { \ |
131 | : "=a"(__ret) \ | 111 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ |
132 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 112 | asm volatile(lock "cmpxchgw %2,%1" \ |
113 | : "=a" (__ret), "+m" (*__ptr) \ | ||
114 | : "r" (__new), "0" (__old) \ | ||
133 | : "memory"); \ | 115 | : "memory"); \ |
134 | break; \ | 116 | break; \ |
117 | } \ | ||
135 | case 4: \ | 118 | case 4: \ |
136 | asm volatile(lock "cmpxchgl %1,%2" \ | 119 | { \ |
137 | : "=a"(__ret) \ | 120 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ |
138 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 121 | asm volatile(lock "cmpxchgl %2,%1" \ |
122 | : "=a" (__ret), "+m" (*__ptr) \ | ||
123 | : "r" (__new), "0" (__old) \ | ||
139 | : "memory"); \ | 124 | : "memory"); \ |
140 | break; \ | 125 | break; \ |
126 | } \ | ||
141 | default: \ | 127 | default: \ |
142 | __cmpxchg_wrong_size(); \ | 128 | __cmpxchg_wrong_size(); \ |
143 | } \ | 129 | } \ |
@@ -175,32 +161,28 @@ extern void __cmpxchg_wrong_size(void); | |||
175 | (unsigned long long)(n))) | 161 | (unsigned long long)(n))) |
176 | #endif | 162 | #endif |
177 | 163 | ||
178 | static inline unsigned long long __cmpxchg64(volatile void *ptr, | 164 | static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) |
179 | unsigned long long old, | ||
180 | unsigned long long new) | ||
181 | { | 165 | { |
182 | unsigned long long prev; | 166 | u64 prev; |
183 | asm volatile(LOCK_PREFIX "cmpxchg8b %3" | 167 | asm volatile(LOCK_PREFIX "cmpxchg8b %1" |
184 | : "=A"(prev) | 168 | : "=A" (prev), |
185 | : "b"((unsigned long)new), | 169 | "+m" (*ptr) |
186 | "c"((unsigned long)(new >> 32)), | 170 | : "b" ((u32)new), |
187 | "m"(*__xg(ptr)), | 171 | "c" ((u32)(new >> 32)), |
188 | "0"(old) | 172 | "0" (old) |
189 | : "memory"); | 173 | : "memory"); |
190 | return prev; | 174 | return prev; |
191 | } | 175 | } |
192 | 176 | ||
193 | static inline unsigned long long __cmpxchg64_local(volatile void *ptr, | 177 | static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) |
194 | unsigned long long old, | ||
195 | unsigned long long new) | ||
196 | { | 178 | { |
197 | unsigned long long prev; | 179 | u64 prev; |
198 | asm volatile("cmpxchg8b %3" | 180 | asm volatile("cmpxchg8b %1" |
199 | : "=A"(prev) | 181 | : "=A" (prev), |
200 | : "b"((unsigned long)new), | 182 | "+m" (*ptr) |
201 | "c"((unsigned long)(new >> 32)), | 183 | : "b" ((u32)new), |
202 | "m"(*__xg(ptr)), | 184 | "c" ((u32)(new >> 32)), |
203 | "0"(old) | 185 | "0" (old) |
204 | : "memory"); | 186 | : "memory"); |
205 | return prev; | 187 | return prev; |
206 | } | 188 | } |
@@ -264,8 +246,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
264 | * to simulate the cmpxchg8b on the 80386 and 80486 CPU. | 246 | * to simulate the cmpxchg8b on the 80386 and 80486 CPU. |
265 | */ | 247 | */ |
266 | 248 | ||
267 | extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | ||
268 | |||
269 | #define cmpxchg64(ptr, o, n) \ | 249 | #define cmpxchg64(ptr, o, n) \ |
270 | ({ \ | 250 | ({ \ |
271 | __typeof__(*(ptr)) __ret; \ | 251 | __typeof__(*(ptr)) __ret; \ |
@@ -283,20 +263,20 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | |||
283 | __ret; }) | 263 | __ret; }) |
284 | 264 | ||
285 | 265 | ||
286 | 266 | #define cmpxchg64_local(ptr, o, n) \ | |
287 | #define cmpxchg64_local(ptr, o, n) \ | 267 | ({ \ |
288 | ({ \ | 268 | __typeof__(*(ptr)) __ret; \ |
289 | __typeof__(*(ptr)) __ret; \ | 269 | __typeof__(*(ptr)) __old = (o); \ |
290 | if (likely(boot_cpu_data.x86 > 4)) \ | 270 | __typeof__(*(ptr)) __new = (n); \ |
291 | __ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr), \ | 271 | alternative_io("call cmpxchg8b_emu", \ |
292 | (unsigned long long)(o), \ | 272 | "cmpxchg8b (%%esi)" , \ |
293 | (unsigned long long)(n)); \ | 273 | X86_FEATURE_CX8, \ |
294 | else \ | 274 | "=A" (__ret), \ |
295 | __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ | 275 | "S" ((ptr)), "0" (__old), \ |
296 | (unsigned long long)(o), \ | 276 | "b" ((unsigned int)__new), \ |
297 | (unsigned long long)(n)); \ | 277 | "c" ((unsigned int)(__new>>32)) \ |
298 | __ret; \ | 278 | : "memory"); \ |
299 | }) | 279 | __ret; }) |
300 | 280 | ||
301 | #endif | 281 | #endif |
302 | 282 | ||