diff options
author | Christoph Lameter <cl@linux.com> | 2010-12-14 11:28:44 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-12-18 09:54:04 -0500 |
commit | 7296e08abac0a22a2534a4f6e493c764f2c77583 (patch) | |
tree | 3aa79d3046bf227b14d1f718fa097f87f6257fb9 | |
parent | 2b7124428561c7c3cfa4a58cc4c6feea53f3148e (diff) |
x86: this_cpu_cmpxchg and this_cpu_xchg operations
Provide support as far as the hardware capabilities of the x86 cpus
allow.
Define CONFIG_CMPXCHG_LOCAL in Kconfig.cpu to allow core code to test for
fast cpuops implementations.
V1->V2:
- Take out the definition for this_cpu_cmpxchg_8 and move it into
a separate patch.
tj: - Reordered ops to better follow this_cpu_* organization.
- Renamed macro temp variables similar to their existing
neighbours.
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | arch/x86/Kconfig.cpu | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/percpu.h | 107 |
2 files changed, 109 insertions, 1 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ac9069890cd..15588a0ef466 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT | |||
310 | config X86_CMPXCHG | 310 | config X86_CMPXCHG |
311 | def_bool X86_64 || (X86_32 && !M386) | 311 | def_bool X86_64 || (X86_32 && !M386) |
312 | 312 | ||
313 | config CMPXCHG_LOCAL | ||
314 | def_bool X86_64 || (X86_32 && !M386) | ||
315 | |||
313 | config X86_L1_CACHE_SHIFT | 316 | config X86_L1_CACHE_SHIFT |
314 | int | 317 | int |
315 | default "7" if MPENTIUM4 || MPSC | 318 | default "7" if MPENTIUM4 || MPSC |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index dd0cd4b6a76f..b85ade511a53 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -263,6 +263,83 @@ do { \ | |||
263 | }) | 263 | }) |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * Beware: xchg on x86 has an implied lock prefix. There will be the cost of | ||
267 | * full lock semantics even though they are not needed. | ||
268 | */ | ||
269 | #define percpu_xchg_op(var, nval) \ | ||
270 | ({ \ | ||
271 | typeof(var) pxo_ret__; \ | ||
272 | typeof(var) pxo_new__ = (nval); \ | ||
273 | switch (sizeof(var)) { \ | ||
274 | case 1: \ | ||
275 | asm("xchgb %2, "__percpu_arg(1) \ | ||
276 | : "=a" (pxo_ret__), "+m" (var) \ | ||
277 | : "q" (pxo_new__) \ | ||
278 | : "memory"); \ | ||
279 | break; \ | ||
280 | case 2: \ | ||
281 | asm("xchgw %2, "__percpu_arg(1) \ | ||
282 | : "=a" (pxo_ret__), "+m" (var) \ | ||
283 | : "r" (pxo_new__) \ | ||
284 | : "memory"); \ | ||
285 | break; \ | ||
286 | case 4: \ | ||
287 | asm("xchgl %2, "__percpu_arg(1) \ | ||
288 | : "=a" (pxo_ret__), "+m" (var) \ | ||
289 | : "r" (pxo_new__) \ | ||
290 | : "memory"); \ | ||
291 | break; \ | ||
292 | case 8: \ | ||
293 | asm("xchgq %2, "__percpu_arg(1) \ | ||
294 | : "=a" (pxo_ret__), "+m" (var) \ | ||
295 | : "r" (pxo_new__) \ | ||
296 | : "memory"); \ | ||
297 | break; \ | ||
298 | default: __bad_percpu_size(); \ | ||
299 | } \ | ||
300 | pxo_ret__; \ | ||
301 | }) | ||
302 | |||
303 | /* | ||
304 | * cmpxchg has no such implied lock semantics as a result it is much | ||
305 | * more efficient for cpu local operations. | ||
306 | */ | ||
307 | #define percpu_cmpxchg_op(var, oval, nval) \ | ||
308 | ({ \ | ||
309 | typeof(var) pco_ret__; \ | ||
310 | typeof(var) pco_old__ = (oval); \ | ||
311 | typeof(var) pco_new__ = (nval); \ | ||
312 | switch (sizeof(var)) { \ | ||
313 | case 1: \ | ||
314 | asm("cmpxchgb %2, "__percpu_arg(1) \ | ||
315 | : "=a" (pco_ret__), "+m" (var) \ | ||
316 | : "q" (pco_new__), "0" (pco_old__) \ | ||
317 | : "memory"); \ | ||
318 | break; \ | ||
319 | case 2: \ | ||
320 | asm("cmpxchgw %2, "__percpu_arg(1) \ | ||
321 | : "=a" (pco_ret__), "+m" (var) \ | ||
322 | : "r" (pco_new__), "0" (pco_old__) \ | ||
323 | : "memory"); \ | ||
324 | break; \ | ||
325 | case 4: \ | ||
326 | asm("cmpxchgl %2, "__percpu_arg(1) \ | ||
327 | : "=a" (pco_ret__), "+m" (var) \ | ||
328 | : "r" (pco_new__), "0" (pco_old__) \ | ||
329 | : "memory"); \ | ||
330 | break; \ | ||
331 | case 8: \ | ||
332 | asm("cmpxchgq %2, "__percpu_arg(1) \ | ||
333 | : "=a" (pco_ret__), "+m" (var) \ | ||
334 | : "r" (pco_new__), "0" (pco_old__) \ | ||
335 | : "memory"); \ | ||
336 | break; \ | ||
337 | default: __bad_percpu_size(); \ | ||
338 | } \ | ||
339 | pco_ret__; \ | ||
340 | }) | ||
341 | |||
342 | /* | ||
266 | * percpu_read() makes gcc load the percpu variable every time it is | 343 | * percpu_read() makes gcc load the percpu variable every time it is |
267 | * accessed while percpu_read_stable() allows the value to be cached. | 344 | * accessed while percpu_read_stable() allows the value to be cached. |
268 | * percpu_read_stable() is more efficient and can be used if its value | 345 | * percpu_read_stable() is more efficient and can be used if its value |
@@ -300,6 +377,12 @@ do { \ | |||
300 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 377 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
301 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 378 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
302 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 379 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
380 | /* | ||
381 | * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much | ||
382 | * faster than an xchg with forced lock semantics. | ||
383 | */ | ||
384 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
385 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
303 | 386 | ||
304 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 387 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
305 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 388 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -319,6 +402,11 @@ do { \ | |||
319 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 402 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
320 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 403 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
321 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 404 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
405 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
406 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
407 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
408 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
409 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
322 | 410 | ||
323 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | 411 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
324 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | 412 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
@@ -332,15 +420,32 @@ do { \ | |||
332 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 420 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
333 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 421 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
334 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 422 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
423 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
424 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
425 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
426 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
427 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
335 | 428 | ||
336 | #ifndef CONFIG_M386 | 429 | #ifndef CONFIG_M386 |
337 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 430 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
338 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 431 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
339 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | 432 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) |
433 | #define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
434 | #define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
435 | #define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
436 | |||
340 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 437 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
341 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 438 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
342 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | 439 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) |
343 | #endif | 440 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
441 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
442 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
443 | |||
444 | #define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
445 | #define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
446 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
447 | #endif /* !CONFIG_M386 */ | ||
448 | |||
344 | /* | 449 | /* |
345 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 450 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
346 | * 32 bit must fall back to generic operations. | 451 | * 32 bit must fall back to generic operations. |