diff options
author | Christoph Lameter <cl@linux.com> | 2011-02-28 05:02:24 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-02-28 05:20:49 -0500 |
commit | b9ec40af0e18fb7d02106be148036c2ea490fdf9 (patch) | |
tree | 6e9ad8aab6303c40a17467b52c0ac29b3a352d83 | |
parent | 7c3343392172ba98d9d90a83edcc4c2e80897009 (diff) |
percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b
instructions.
-tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and
other cosmetic changes.
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | arch/x86/include/asm/percpu.h | 48 | ||||
-rw-r--r-- | arch/x86/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 59 |
3 files changed, 108 insertions, 0 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3788f4649db4..260ac7af1fdc 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,6 +451,26 @@ do { \ | |||
451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
452 | #endif /* !CONFIG_M386 */ | 452 | #endif /* !CONFIG_M386 */ |
453 | 453 | ||
454 | #ifdef CONFIG_X86_CMPXCHG64 | ||
455 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | ||
456 | ({ \ | ||
457 | char __ret; \ | ||
458 | typeof(o1) __o1 = o1; \ | ||
459 | typeof(o1) __n1 = n1; \ | ||
460 | typeof(o2) __o2 = o2; \ | ||
461 | typeof(o2) __n2 = n2; \ | ||
462 | typeof(o2) __dummy = n2; \ | ||
463 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | ||
464 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | ||
465 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | ||
466 | __ret; \ | ||
467 | }) | ||
468 | |||
469 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
470 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
471 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
472 | #endif /* CONFIG_X86_CMPXCHG64 */ | ||
473 | |||
454 | /* | 474 | /* |
455 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 475 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
456 | * 32 bit must fall back to generic operations. | 476 | * 32 bit must fall back to generic operations. |
@@ -480,6 +500,34 @@ do { \ | |||
480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 500 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 501 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 502 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
503 | |||
504 | /* | ||
505 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | ||
506 | * is not supported on early AMD64 processors so we must be able to emulate | ||
507 | * it in software. The address used in the cmpxchg16 instruction must be | ||
508 | * aligned to a 16 byte boundary. | ||
509 | */ | ||
510 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
511 | ({ \ | ||
512 | char __ret; \ | ||
513 | typeof(o1) __o1 = o1; \ | ||
514 | typeof(o1) __n1 = n1; \ | ||
515 | typeof(o2) __o2 = o2; \ | ||
516 | typeof(o2) __n2 = n2; \ | ||
517 | typeof(o2) __dummy; \ | ||
518 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | ||
519 | "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ | ||
520 | X86_FEATURE_CX16, \ | ||
521 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | ||
522 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | ||
523 | "a"(__o1), "d"(__o2)); \ | ||
524 | __ret; \ | ||
525 | }) | ||
526 | |||
527 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
528 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
529 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
530 | |||
483 | #endif | 531 | #endif |
484 | 532 | ||
485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 533 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede0..f2479f19ddde 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -42,4 +42,5 @@ else | |||
42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
45 | lib-y += cmpxchg16b_emu.o | ||
45 | endif | 46 | endif |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 000000000000..3e8b08a6de2b --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/alternative-asm.h> | ||
10 | #include <asm/frame.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | .text | ||
14 | |||
15 | /* | ||
16 | * Inputs: | ||
17 | * %rsi : memory location to compare | ||
18 | * %rax : low 64 bits of old value | ||
19 | * %rdx : high 64 bits of old value | ||
20 | * %rbx : low 64 bits of new value | ||
21 | * %rcx : high 64 bits of new value | ||
22 | * %al : Operation successful | ||
23 | */ | ||
24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
25 | CFI_STARTPROC | ||
26 | |||
27 | # | ||
28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
29 | # via the ZF. Caller will access %al to get result. | ||
30 | # | ||
31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
32 | # do *not* have a fully atomic operation but just an operation that is | ||
33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
34 | # macros). | ||
35 | # | ||
36 | this_cpu_cmpxchg16b_emu: | ||
37 | pushf | ||
38 | cli | ||
39 | |||
40 | cmpq %gs:(%rsi), %rax | ||
41 | jne not_same | ||
42 | cmpq %gs:8(%rsi), %rdx | ||
43 | jne not_same | ||
44 | |||
45 | movq %rbx, %gs:(%rsi) | ||
46 | movq %rcx, %gs:8(%rsi) | ||
47 | |||
48 | popf | ||
49 | mov $1, %al | ||
50 | ret | ||
51 | |||
52 | not_same: | ||
53 | popf | ||
54 | xor %al,%al | ||
55 | ret | ||
56 | |||
57 | CFI_ENDPROC | ||
58 | |||
59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||