diff options
| author | Christoph Lameter <cl@linux.com> | 2011-02-28 05:02:24 -0500 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2011-02-28 05:20:49 -0500 |
| commit | b9ec40af0e18fb7d02106be148036c2ea490fdf9 (patch) | |
| tree | 6e9ad8aab6303c40a17467b52c0ac29b3a352d83 | |
| parent | 7c3343392172ba98d9d90a83edcc4c2e80897009 (diff) | |
percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b
instructions.
-tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and
other cosmetic changes.
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
| -rw-r--r-- | arch/x86/include/asm/percpu.h | 48 | ||||
| -rw-r--r-- | arch/x86/lib/Makefile | 1 | ||||
| -rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 59 |
3 files changed, 108 insertions, 0 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3788f4649db..260ac7af1fd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
| @@ -451,6 +451,26 @@ do { \ | |||
| 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
| 452 | #endif /* !CONFIG_M386 */ | 452 | #endif /* !CONFIG_M386 */ |
| 453 | 453 | ||
| 454 | #ifdef CONFIG_X86_CMPXCHG64 | ||
| 455 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | ||
| 456 | ({ \ | ||
| 457 | char __ret; \ | ||
| 458 | typeof(o1) __o1 = o1; \ | ||
| 459 | typeof(o1) __n1 = n1; \ | ||
| 460 | typeof(o2) __o2 = o2; \ | ||
| 461 | typeof(o2) __n2 = n2; \ | ||
| 462 | typeof(o2) __dummy = n2; \ | ||
| 463 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | ||
| 464 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | ||
| 465 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | ||
| 466 | __ret; \ | ||
| 467 | }) | ||
| 468 | |||
| 469 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
| 470 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
| 471 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
| 472 | #endif /* CONFIG_X86_CMPXCHG64 */ | ||
| 473 | |||
| 454 | /* | 474 | /* |
| 455 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 475 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
| 456 | * 32 bit must fall back to generic operations. | 476 | * 32 bit must fall back to generic operations. |
| @@ -480,6 +500,34 @@ do { \ | |||
| 480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 500 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
| 481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 501 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
| 482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 502 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
| 503 | |||
| 504 | /* | ||
| 505 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | ||
| 506 | * is not supported on early AMD64 processors so we must be able to emulate | ||
| 507 | * it in software. The address used in the cmpxchg16 instruction must be | ||
| 508 | * aligned to a 16 byte boundary. | ||
| 509 | */ | ||
| 510 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
| 511 | ({ \ | ||
| 512 | char __ret; \ | ||
| 513 | typeof(o1) __o1 = o1; \ | ||
| 514 | typeof(o1) __n1 = n1; \ | ||
| 515 | typeof(o2) __o2 = o2; \ | ||
| 516 | typeof(o2) __n2 = n2; \ | ||
| 517 | typeof(o2) __dummy; \ | ||
| 518 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | ||
| 519 | "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ | ||
| 520 | X86_FEATURE_CX16, \ | ||
| 521 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | ||
| 522 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | ||
| 523 | "a"(__o1), "d"(__o2)); \ | ||
| 524 | __ret; \ | ||
| 525 | }) | ||
| 526 | |||
| 527 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
| 528 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
| 529 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
| 530 | |||
| 483 | #endif | 531 | #endif |
| 484 | 532 | ||
| 485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 533 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede..f2479f19ddd 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
| @@ -42,4 +42,5 @@ else | |||
| 42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
| 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
| 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
| 45 | lib-y += cmpxchg16b_emu.o | ||
| 45 | endif | 46 | endif |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 00000000000..3e8b08a6de2 --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or | ||
| 3 | * modify it under the terms of the GNU General Public License | ||
| 4 | * as published by the Free Software Foundation; version 2 | ||
| 5 | * of the License. | ||
| 6 | * | ||
| 7 | */ | ||
| 8 | #include <linux/linkage.h> | ||
| 9 | #include <asm/alternative-asm.h> | ||
| 10 | #include <asm/frame.h> | ||
| 11 | #include <asm/dwarf2.h> | ||
| 12 | |||
| 13 | .text | ||
| 14 | |||
| 15 | /* | ||
| 16 | * Inputs: | ||
| 17 | * %rsi : memory location to compare | ||
| 18 | * %rax : low 64 bits of old value | ||
| 19 | * %rdx : high 64 bits of old value | ||
| 20 | * %rbx : low 64 bits of new value | ||
| 21 | * %rcx : high 64 bits of new value | ||
| 22 | * %al : Operation successful | ||
| 23 | */ | ||
| 24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
| 25 | CFI_STARTPROC | ||
| 26 | |||
| 27 | # | ||
| 28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
| 29 | # via the ZF. Caller will access %al to get result. | ||
| 30 | # | ||
| 31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
| 32 | # do *not* have a fully atomic operation but just an operation that is | ||
| 33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
| 34 | # macros). | ||
| 35 | # | ||
| 36 | this_cpu_cmpxchg16b_emu: | ||
| 37 | pushf | ||
| 38 | cli | ||
| 39 | |||
| 40 | cmpq %gs:(%rsi), %rax | ||
| 41 | jne not_same | ||
| 42 | cmpq %gs:8(%rsi), %rdx | ||
| 43 | jne not_same | ||
| 44 | |||
| 45 | movq %rbx, %gs:(%rsi) | ||
| 46 | movq %rcx, %gs:8(%rsi) | ||
| 47 | |||
| 48 | popf | ||
| 49 | mov $1, %al | ||
| 50 | ret | ||
| 51 | |||
| 52 | not_same: | ||
| 53 | popf | ||
| 54 | xor %al,%al | ||
| 55 | ret | ||
| 56 | |||
| 57 | CFI_ENDPROC | ||
| 58 | |||
| 59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||
