diff options
Diffstat (limited to 'arch/x86/include/asm/percpu.h')
-rw-r--r-- | arch/x86/include/asm/percpu.h | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b85ade511a53..8ee45167e817 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -263,8 +263,9 @@ do { \ | |||
263 | }) | 263 | }) |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * Beware: xchg on x86 has an implied lock prefix. There will be the cost of | 266 | * xchg is implemented using cmpxchg without a lock prefix. xchg is |
267 | * full lock semantics even though they are not needed. | 267 | * expensive due to the implied lock prefix. The processor cannot prefetch |
268 | * cachelines if xchg is used. | ||
268 | */ | 269 | */ |
269 | #define percpu_xchg_op(var, nval) \ | 270 | #define percpu_xchg_op(var, nval) \ |
270 | ({ \ | 271 | ({ \ |
@@ -272,25 +273,33 @@ do { \ | |||
272 | typeof(var) pxo_new__ = (nval); \ | 273 | typeof(var) pxo_new__ = (nval); \ |
273 | switch (sizeof(var)) { \ | 274 | switch (sizeof(var)) { \ |
274 | case 1: \ | 275 | case 1: \ |
275 | asm("xchgb %2, "__percpu_arg(1) \ | 276 | asm("\n1:mov "__percpu_arg(1)",%%al" \ |
277 | "\n\tcmpxchgb %2, "__percpu_arg(1) \ | ||
278 | "\n\tjnz 1b" \ | ||
276 | : "=a" (pxo_ret__), "+m" (var) \ | 279 | : "=a" (pxo_ret__), "+m" (var) \ |
277 | : "q" (pxo_new__) \ | 280 | : "q" (pxo_new__) \ |
278 | : "memory"); \ | 281 | : "memory"); \ |
279 | break; \ | 282 | break; \ |
280 | case 2: \ | 283 | case 2: \ |
281 | asm("xchgw %2, "__percpu_arg(1) \ | 284 | asm("\n1:mov "__percpu_arg(1)",%%ax" \ |
285 | "\n\tcmpxchgw %2, "__percpu_arg(1) \ | ||
286 | "\n\tjnz 1b" \ | ||
282 | : "=a" (pxo_ret__), "+m" (var) \ | 287 | : "=a" (pxo_ret__), "+m" (var) \ |
283 | : "r" (pxo_new__) \ | 288 | : "r" (pxo_new__) \ |
284 | : "memory"); \ | 289 | : "memory"); \ |
285 | break; \ | 290 | break; \ |
286 | case 4: \ | 291 | case 4: \ |
287 | asm("xchgl %2, "__percpu_arg(1) \ | 292 | asm("\n1:mov "__percpu_arg(1)",%%eax" \ |
293 | "\n\tcmpxchgl %2, "__percpu_arg(1) \ | ||
294 | "\n\tjnz 1b" \ | ||
288 | : "=a" (pxo_ret__), "+m" (var) \ | 295 | : "=a" (pxo_ret__), "+m" (var) \ |
289 | : "r" (pxo_new__) \ | 296 | : "r" (pxo_new__) \ |
290 | : "memory"); \ | 297 | : "memory"); \ |
291 | break; \ | 298 | break; \ |
292 | case 8: \ | 299 | case 8: \ |
293 | asm("xchgq %2, "__percpu_arg(1) \ | 300 | asm("\n1:mov "__percpu_arg(1)",%%rax" \ |
301 | "\n\tcmpxchgq %2, "__percpu_arg(1) \ | ||
302 | "\n\tjnz 1b" \ | ||
294 | : "=a" (pxo_ret__), "+m" (var) \ | 303 | : "=a" (pxo_ret__), "+m" (var) \ |
295 | : "r" (pxo_new__) \ | 304 | : "r" (pxo_new__) \ |
296 | : "memory"); \ | 305 | : "memory"); \ |