diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 11:22:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 11:22:41 -0400 |
commit | 79d8a8f736151b12129984b1250fd708440e742c (patch) | |
tree | f67eebe2bafca8820955ee9f851985a41fb32e66 /arch/x86 | |
parent | bd2895eeade5f11f3e5906283c630bbdb4b57454 (diff) | |
parent | b9ec40af0e18fb7d02106be148036c2ea490fdf9 (diff) |
Merge branch 'for-2.6.39' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-2.6.39' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
percpu: Generic support for this_cpu_cmpxchg_double()
alpha: use L1_CACHE_BYTES for cacheline size in the linker script
percpu: align percpu readmostly subsection to cacheline
Fix up trivial conflict in arch/x86/kernel/vmlinux.lds.S due to the
percpu alignment having changed ("x86: Reduce back the alignment of the
per-CPU data section")
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/percpu.h | 48 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 59 |
4 files changed, 110 insertions, 2 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 7e172955ee57..a09e1f052d84 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,6 +451,26 @@ do { \ | |||
451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
452 | #endif /* !CONFIG_M386 */ | 452 | #endif /* !CONFIG_M386 */ |
453 | 453 | ||
454 | #ifdef CONFIG_X86_CMPXCHG64 | ||
455 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | ||
456 | ({ \ | ||
457 | char __ret; \ | ||
458 | typeof(o1) __o1 = o1; \ | ||
459 | typeof(o1) __n1 = n1; \ | ||
460 | typeof(o2) __o2 = o2; \ | ||
461 | typeof(o2) __n2 = n2; \ | ||
462 | typeof(o2) __dummy = n2; \ | ||
463 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | ||
464 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | ||
465 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | ||
466 | __ret; \ | ||
467 | }) | ||
468 | |||
469 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
470 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
471 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
472 | #endif /* CONFIG_X86_CMPXCHG64 */ | ||
473 | |||
454 | /* | 474 | /* |
455 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 475 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
456 | * 32 bit must fall back to generic operations. | 476 | * 32 bit must fall back to generic operations. |
@@ -480,6 +500,34 @@ do { \ | |||
480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 500 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 501 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 502 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
503 | |||
504 | /* | ||
505 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | ||
506 | * is not supported on early AMD64 processors so we must be able to emulate | ||
507 | * it in software. The address used in the cmpxchg16 instruction must be | ||
508 | * aligned to a 16 byte boundary. | ||
509 | */ | ||
510 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
511 | ({ \ | ||
512 | char __ret; \ | ||
513 | typeof(o1) __o1 = o1; \ | ||
514 | typeof(o1) __n1 = n1; \ | ||
515 | typeof(o2) __o2 = o2; \ | ||
516 | typeof(o2) __n2 = n2; \ | ||
517 | typeof(o2) __dummy; \ | ||
518 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | ||
519 | "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ | ||
520 | X86_FEATURE_CX16, \ | ||
521 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | ||
522 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | ||
523 | "a"(__o1), "d"(__o2)); \ | ||
524 | __ret; \ | ||
525 | }) | ||
526 | |||
527 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
528 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
529 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
530 | |||
483 | #endif | 531 | #endif |
484 | 532 | ||
485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 533 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0381e1f3baed..101c32309f17 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -231,7 +231,7 @@ SECTIONS | |||
231 | * output PHDR, so the next output section - .init.text - should | 231 | * output PHDR, so the next output section - .init.text - should |
232 | * start another segment - init. | 232 | * start another segment - init. |
233 | */ | 233 | */ |
234 | PERCPU_VADDR(0, :percpu) | 234 | PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) |
235 | #endif | 235 | #endif |
236 | 236 | ||
237 | INIT_TEXT_SECTION(PAGE_SIZE) | 237 | INIT_TEXT_SECTION(PAGE_SIZE) |
@@ -306,7 +306,7 @@ SECTIONS | |||
306 | } | 306 | } |
307 | 307 | ||
308 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 308 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
309 | PERCPU(PAGE_SIZE) | 309 | PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) |
310 | #endif | 310 | #endif |
311 | 311 | ||
312 | . = ALIGN(PAGE_SIZE); | 312 | . = ALIGN(PAGE_SIZE); |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede0..f2479f19ddde 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -42,4 +42,5 @@ else | |||
42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
45 | lib-y += cmpxchg16b_emu.o | ||
45 | endif | 46 | endif |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 000000000000..3e8b08a6de2b --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/alternative-asm.h> | ||
10 | #include <asm/frame.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | .text | ||
14 | |||
15 | /* | ||
16 | * Inputs: | ||
17 | * %rsi : memory location to compare | ||
18 | * %rax : low 64 bits of old value | ||
19 | * %rdx : high 64 bits of old value | ||
20 | * %rbx : low 64 bits of new value | ||
21 | * %rcx : high 64 bits of new value | ||
22 | * %al : Operation successful | ||
23 | */ | ||
24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
25 | CFI_STARTPROC | ||
26 | |||
27 | # | ||
28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
29 | # via the ZF. Caller will access %al to get result. | ||
30 | # | ||
31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
32 | # do *not* have a fully atomic operation but just an operation that is | ||
33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
34 | # macros). | ||
35 | # | ||
36 | this_cpu_cmpxchg16b_emu: | ||
37 | pushf | ||
38 | cli | ||
39 | |||
40 | cmpq %gs:(%rsi), %rax | ||
41 | jne not_same | ||
42 | cmpq %gs:8(%rsi), %rdx | ||
43 | jne not_same | ||
44 | |||
45 | movq %rbx, %gs:(%rsi) | ||
46 | movq %rcx, %gs:8(%rsi) | ||
47 | |||
48 | popf | ||
49 | mov $1, %al | ||
50 | ret | ||
51 | |||
52 | not_same: | ||
53 | popf | ||
54 | xor %al,%al | ||
55 | ret | ||
56 | |||
57 | CFI_ENDPROC | ||
58 | |||
59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||