aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@suse.com>2011-12-14 03:33:25 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-15 02:17:14 -0500
commitcebef5beed3de3037de85a521495897256b2c5da (patch)
tree0deb5a368bde1850caf8311e9fba56e4a711b930
parent969df4b82904a30fef19a67398a0c854d223ea67 (diff)
x86: Fix and improve percpu_cmpxchg{8,16}b_double()
They had several problems/shortcomings: Only the first memory operand was mentioned in the 2x32bit asm() operands, and 2x64-bit version had a memory clobber. The first allowed the compiler to not recognize the need to re-load the data in case it had it cached in some register, and the second was overly destructive. The memory operand in the 2x32-bit asm() was declared to only be an output. The types of the local copies of the old and new values were incorrect (as in other per-CPU ops, the types of the per-CPU variables accessed should be used here, to make sure the respective types are compatible). The __dummy variable was pointless (and needlessly initialized in the 2x32-bit case), given that local copies of the inputs already exist. The 2x64-bit variant forced the address of the first object into %rsi, even though this is needed only for the call to the emulation function. The real cmpxchg16b can operate on an memory. At once also change the return value type to what it really is - 'bool'. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: David Howells <dhowells@redhat.com> Cc: Christoph Lameter <cl@linux.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4EE86D6502000078000679FE@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/percpu.h53
1 files changed, 21 insertions, 32 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3470c9d0ebba..529bf07e8067 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,23 +451,20 @@ do { \
451#endif /* !CONFIG_M386 */ 451#endif /* !CONFIG_M386 */
452 452
453#ifdef CONFIG_X86_CMPXCHG64 453#ifdef CONFIG_X86_CMPXCHG64
454#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ 454#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
455({ \ 455({ \
456 char __ret; \ 456 bool __ret; \
457 typeof(o1) __o1 = o1; \ 457 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
458 typeof(o1) __n1 = n1; \ 458 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
459 typeof(o2) __o2 = o2; \
460 typeof(o2) __n2 = n2; \
461 typeof(o2) __dummy = n2; \
462 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ 459 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
463 : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ 460 : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
464 : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ 461 : "b" (__n1), "c" (__n2), "a" (__o1)); \
465 __ret; \ 462 __ret; \
466}) 463})
467 464
468#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 465#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
469#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 466#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
470#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 467#define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
471#endif /* CONFIG_X86_CMPXCHG64 */ 468#endif /* CONFIG_X86_CMPXCHG64 */
472 469
473/* 470/*
@@ -508,31 +505,23 @@ do { \
508 * it in software. The address used in the cmpxchg16 instruction must be 505 * it in software. The address used in the cmpxchg16 instruction must be
509 * aligned to a 16 byte boundary. 506 * aligned to a 16 byte boundary.
510 */ 507 */
511#ifdef CONFIG_SMP 508#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
512#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
513#else
514#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
515#endif
516#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
517({ \ 509({ \
518 char __ret; \ 510 bool __ret; \
519 typeof(o1) __o1 = o1; \ 511 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
520 typeof(o1) __n1 = n1; \ 512 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
521 typeof(o2) __o2 = o2; \ 513 alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
522 typeof(o2) __n2 = n2; \ 514 "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
523 typeof(o2) __dummy; \
524 alternative_io(CMPXCHG16B_EMU_CALL, \
525 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
526 X86_FEATURE_CX16, \ 515 X86_FEATURE_CX16, \
527 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 516 ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
528 "S" (&pcp1), "b"(__n1), "c"(__n2), \ 517 "+m" (pcp2), "+d" (__o2)), \
529 "a"(__o1), "d"(__o2) : "memory"); \ 518 "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
530 __ret; \ 519 __ret; \
531}) 520})
532 521
533#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 522#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
534#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 523#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
535#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 524#define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
536 525
537#endif 526#endif
538 527