diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 11:22:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 11:22:41 -0400 |
commit | 79d8a8f736151b12129984b1250fd708440e742c (patch) | |
tree | f67eebe2bafca8820955ee9f851985a41fb32e66 | |
parent | bd2895eeade5f11f3e5906283c630bbdb4b57454 (diff) | |
parent | b9ec40af0e18fb7d02106be148036c2ea490fdf9 (diff) |
Merge branch 'for-2.6.39' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-2.6.39' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
percpu: Generic support for this_cpu_cmpxchg_double()
alpha: use L1_CACHE_BYTES for cacheline size in the linker script
percpu: align percpu readmostly subsection to cacheline
Fix up trivial conflict in arch/x86/kernel/vmlinux.lds.S due to the
percpu alignment having changed ("x86: Reduce back the alignment of the
per-CPU data section")
-rw-r--r-- | arch/alpha/kernel/vmlinux.lds.S | 5 | ||||
-rw-r--r-- | arch/arm/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/blackfin/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/cris/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/frv/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/m32r/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/mips/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/mn10300/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/parisc/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/s390/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/sh/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/sparc/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/tile/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/um/include/asm/common.lds.S | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/percpu.h | 48 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 59 | ||||
-rw-r--r-- | arch/xtensa/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 35 | ||||
-rw-r--r-- | include/linux/percpu.h | 128 |
23 files changed, 279 insertions, 33 deletions
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 003ef4c02585..433be2a24f31 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <asm-generic/vmlinux.lds.h> | 1 | #include <asm-generic/vmlinux.lds.h> |
2 | #include <asm/thread_info.h> | 2 | #include <asm/thread_info.h> |
3 | #include <asm/cache.h> | ||
3 | #include <asm/page.h> | 4 | #include <asm/page.h> |
4 | 5 | ||
5 | OUTPUT_FORMAT("elf64-alpha") | 6 | OUTPUT_FORMAT("elf64-alpha") |
@@ -38,7 +39,7 @@ SECTIONS | |||
38 | __init_begin = ALIGN(PAGE_SIZE); | 39 | __init_begin = ALIGN(PAGE_SIZE); |
39 | INIT_TEXT_SECTION(PAGE_SIZE) | 40 | INIT_TEXT_SECTION(PAGE_SIZE) |
40 | INIT_DATA_SECTION(16) | 41 | INIT_DATA_SECTION(16) |
41 | PERCPU(PAGE_SIZE) | 42 | PERCPU(L1_CACHE_BYTES, PAGE_SIZE) |
42 | /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page | 43 | /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page |
43 | needed for the THREAD_SIZE aligned init_task gets freed after init */ | 44 | needed for the THREAD_SIZE aligned init_task gets freed after init */ |
44 | . = ALIGN(THREAD_SIZE); | 45 | . = ALIGN(THREAD_SIZE); |
@@ -46,7 +47,7 @@ SECTIONS | |||
46 | /* Freed after init ends here */ | 47 | /* Freed after init ends here */ |
47 | 48 | ||
48 | _data = .; | 49 | _data = .; |
49 | RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) | 50 | RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) |
50 | 51 | ||
51 | .got : { | 52 | .got : { |
52 | *(.got) | 53 | *(.got) |
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 61462790757f..28fea9b2d129 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S | |||
@@ -78,7 +78,7 @@ SECTIONS | |||
78 | #endif | 78 | #endif |
79 | } | 79 | } |
80 | 80 | ||
81 | PERCPU(PAGE_SIZE) | 81 | PERCPU(32, PAGE_SIZE) |
82 | 82 | ||
83 | #ifndef CONFIG_XIP_KERNEL | 83 | #ifndef CONFIG_XIP_KERNEL |
84 | . = ALIGN(PAGE_SIZE); | 84 | . = ALIGN(PAGE_SIZE); |
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 4122678529c0..c40d07f708e8 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S | |||
@@ -136,7 +136,7 @@ SECTIONS | |||
136 | 136 | ||
137 | . = ALIGN(16); | 137 | . = ALIGN(16); |
138 | INIT_DATA_SECTION(16) | 138 | INIT_DATA_SECTION(16) |
139 | PERCPU(4) | 139 | PERCPU(32, 4) |
140 | 140 | ||
141 | .exit.data : | 141 | .exit.data : |
142 | { | 142 | { |
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index c49be845f96a..728bbd9e7d4c 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S | |||
@@ -102,7 +102,7 @@ SECTIONS | |||
102 | #endif | 102 | #endif |
103 | __vmlinux_end = .; /* Last address of the physical file. */ | 103 | __vmlinux_end = .; /* Last address of the physical file. */ |
104 | #ifdef CONFIG_ETRAX_ARCH_V32 | 104 | #ifdef CONFIG_ETRAX_ARCH_V32 |
105 | PERCPU(PAGE_SIZE) | 105 | PERCPU(32, PAGE_SIZE) |
106 | 106 | ||
107 | .init.ramfs : { | 107 | .init.ramfs : { |
108 | INIT_RAM_FS | 108 | INIT_RAM_FS |
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 8b973f3cc90e..0daae8af5787 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S | |||
@@ -37,7 +37,7 @@ SECTIONS | |||
37 | _einittext = .; | 37 | _einittext = .; |
38 | 38 | ||
39 | INIT_DATA_SECTION(8) | 39 | INIT_DATA_SECTION(8) |
40 | PERCPU(4096) | 40 | PERCPU(L1_CACHE_BYTES, 4096) |
41 | 41 | ||
42 | . = ALIGN(PAGE_SIZE); | 42 | . = ALIGN(PAGE_SIZE); |
43 | __init_end = .; | 43 | __init_end = .; |
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5a4d044dcb1c..787de4a77d82 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S | |||
@@ -198,7 +198,7 @@ SECTIONS { | |||
198 | 198 | ||
199 | /* Per-cpu data: */ | 199 | /* Per-cpu data: */ |
200 | . = ALIGN(PERCPU_PAGE_SIZE); | 200 | . = ALIGN(PERCPU_PAGE_SIZE); |
201 | PERCPU_VADDR(PERCPU_ADDR, :percpu) | 201 | PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) |
202 | __phys_per_cpu_start = __per_cpu_load; | 202 | __phys_per_cpu_start = __per_cpu_load; |
203 | /* | 203 | /* |
204 | * ensure percpu data fits | 204 | * ensure percpu data fits |
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 7da94eaa082b..c194d64cdbb9 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S | |||
@@ -53,7 +53,7 @@ SECTIONS | |||
53 | __init_begin = .; | 53 | __init_begin = .; |
54 | INIT_TEXT_SECTION(PAGE_SIZE) | 54 | INIT_TEXT_SECTION(PAGE_SIZE) |
55 | INIT_DATA_SECTION(16) | 55 | INIT_DATA_SECTION(16) |
56 | PERCPU(PAGE_SIZE) | 56 | PERCPU(32, PAGE_SIZE) |
57 | . = ALIGN(PAGE_SIZE); | 57 | . = ALIGN(PAGE_SIZE); |
58 | __init_end = .; | 58 | __init_end = .; |
59 | /* freed after init ends here */ | 59 | /* freed after init ends here */ |
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 570607b376b5..832afbb87588 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S | |||
@@ -115,7 +115,7 @@ SECTIONS | |||
115 | EXIT_DATA | 115 | EXIT_DATA |
116 | } | 116 | } |
117 | 117 | ||
118 | PERCPU(PAGE_SIZE) | 118 | PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE) |
119 | . = ALIGN(PAGE_SIZE); | 119 | . = ALIGN(PAGE_SIZE); |
120 | __init_end = .; | 120 | __init_end = .; |
121 | /* freed after init ends here */ | 121 | /* freed after init ends here */ |
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index febbeee7f2f5..968bcd2cb022 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S | |||
@@ -70,7 +70,7 @@ SECTIONS | |||
70 | .exit.text : { EXIT_TEXT; } | 70 | .exit.text : { EXIT_TEXT; } |
71 | .exit.data : { EXIT_DATA; } | 71 | .exit.data : { EXIT_DATA; } |
72 | 72 | ||
73 | PERCPU(PAGE_SIZE) | 73 | PERCPU(32, PAGE_SIZE) |
74 | . = ALIGN(PAGE_SIZE); | 74 | . = ALIGN(PAGE_SIZE); |
75 | __init_end = .; | 75 | __init_end = .; |
76 | /* freed after init ends here */ | 76 | /* freed after init ends here */ |
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index d64a6bbec2aa..8f1e4efd143e 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S | |||
@@ -145,7 +145,7 @@ SECTIONS | |||
145 | EXIT_DATA | 145 | EXIT_DATA |
146 | } | 146 | } |
147 | 147 | ||
148 | PERCPU(PAGE_SIZE) | 148 | PERCPU(L1_CACHE_BYTES, PAGE_SIZE) |
149 | . = ALIGN(PAGE_SIZE); | 149 | . = ALIGN(PAGE_SIZE); |
150 | __init_end = .; | 150 | __init_end = .; |
151 | /* freed after init ends here */ | 151 | /* freed after init ends here */ |
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8a0deefac08d..b9150f07d266 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S | |||
@@ -160,7 +160,7 @@ SECTIONS | |||
160 | INIT_RAM_FS | 160 | INIT_RAM_FS |
161 | } | 161 | } |
162 | 162 | ||
163 | PERCPU(PAGE_SIZE) | 163 | PERCPU(L1_CACHE_BYTES, PAGE_SIZE) |
164 | 164 | ||
165 | . = ALIGN(8); | 165 | . = ALIGN(8); |
166 | .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { | 166 | .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { |
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a68ac10213b2..1bc18cdb525b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S | |||
@@ -77,7 +77,7 @@ SECTIONS | |||
77 | . = ALIGN(PAGE_SIZE); | 77 | . = ALIGN(PAGE_SIZE); |
78 | INIT_DATA_SECTION(0x100) | 78 | INIT_DATA_SECTION(0x100) |
79 | 79 | ||
80 | PERCPU(PAGE_SIZE) | 80 | PERCPU(0x100, PAGE_SIZE) |
81 | . = ALIGN(PAGE_SIZE); | 81 | . = ALIGN(PAGE_SIZE); |
82 | __init_end = .; /* freed after init ends here */ | 82 | __init_end = .; /* freed after init ends here */ |
83 | 83 | ||
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 7f8a709c3ada..af4d46187a79 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S | |||
@@ -66,7 +66,7 @@ SECTIONS | |||
66 | __machvec_end = .; | 66 | __machvec_end = .; |
67 | } | 67 | } |
68 | 68 | ||
69 | PERCPU(PAGE_SIZE) | 69 | PERCPU(L1_CACHE_BYTES, PAGE_SIZE) |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * .exit.text is discarded at runtime, not link time, to deal with | 72 | * .exit.text is discarded at runtime, not link time, to deal with |
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 0c1e6783657f..92b557afe535 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S | |||
@@ -108,7 +108,7 @@ SECTIONS | |||
108 | __sun4v_2insn_patch_end = .; | 108 | __sun4v_2insn_patch_end = .; |
109 | } | 109 | } |
110 | 110 | ||
111 | PERCPU(PAGE_SIZE) | 111 | PERCPU(SMP_CACHE_BYTES, PAGE_SIZE) |
112 | 112 | ||
113 | . = ALIGN(PAGE_SIZE); | 113 | . = ALIGN(PAGE_SIZE); |
114 | __init_end = .; | 114 | __init_end = .; |
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 25fdc0c1839a..c6ce378e0678 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S | |||
@@ -63,7 +63,7 @@ SECTIONS | |||
63 | *(.init.page) | 63 | *(.init.page) |
64 | } :data =0 | 64 | } :data =0 |
65 | INIT_DATA_SECTION(16) | 65 | INIT_DATA_SECTION(16) |
66 | PERCPU(PAGE_SIZE) | 66 | PERCPU(L2_CACHE_BYTES, PAGE_SIZE) |
67 | . = ALIGN(PAGE_SIZE); | 67 | . = ALIGN(PAGE_SIZE); |
68 | VMLINUX_SYMBOL(_einitdata) = .; | 68 | VMLINUX_SYMBOL(_einitdata) = .; |
69 | 69 | ||
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index ac55b9efa1ce..34bede8aad4a 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S | |||
@@ -42,7 +42,7 @@ | |||
42 | INIT_SETUP(0) | 42 | INIT_SETUP(0) |
43 | } | 43 | } |
44 | 44 | ||
45 | PERCPU(32) | 45 | PERCPU(32, 32) |
46 | 46 | ||
47 | .initcall.init : { | 47 | .initcall.init : { |
48 | INIT_CALLS | 48 | INIT_CALLS |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 7e172955ee57..a09e1f052d84 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,6 +451,26 @@ do { \ | |||
451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
452 | #endif /* !CONFIG_M386 */ | 452 | #endif /* !CONFIG_M386 */ |
453 | 453 | ||
454 | #ifdef CONFIG_X86_CMPXCHG64 | ||
455 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | ||
456 | ({ \ | ||
457 | char __ret; \ | ||
458 | typeof(o1) __o1 = o1; \ | ||
459 | typeof(o1) __n1 = n1; \ | ||
460 | typeof(o2) __o2 = o2; \ | ||
461 | typeof(o2) __n2 = n2; \ | ||
462 | typeof(o2) __dummy = n2; \ | ||
463 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | ||
464 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | ||
465 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | ||
466 | __ret; \ | ||
467 | }) | ||
468 | |||
469 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
470 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
471 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
472 | #endif /* CONFIG_X86_CMPXCHG64 */ | ||
473 | |||
454 | /* | 474 | /* |
455 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 475 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
456 | * 32 bit must fall back to generic operations. | 476 | * 32 bit must fall back to generic operations. |
@@ -480,6 +500,34 @@ do { \ | |||
480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 500 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 501 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 502 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
503 | |||
504 | /* | ||
505 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | ||
506 | * is not supported on early AMD64 processors so we must be able to emulate | ||
507 | * it in software. The address used in the cmpxchg16 instruction must be | ||
508 | * aligned to a 16 byte boundary. | ||
509 | */ | ||
510 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
511 | ({ \ | ||
512 | char __ret; \ | ||
513 | typeof(o1) __o1 = o1; \ | ||
514 | typeof(o1) __n1 = n1; \ | ||
515 | typeof(o2) __o2 = o2; \ | ||
516 | typeof(o2) __n2 = n2; \ | ||
517 | typeof(o2) __dummy; \ | ||
518 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | ||
519 | "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ | ||
520 | X86_FEATURE_CX16, \ | ||
521 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | ||
522 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | ||
523 | "a"(__o1), "d"(__o2)); \ | ||
524 | __ret; \ | ||
525 | }) | ||
526 | |||
527 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
528 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
529 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
530 | |||
483 | #endif | 531 | #endif |
484 | 532 | ||
485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 533 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0381e1f3baed..101c32309f17 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -231,7 +231,7 @@ SECTIONS | |||
231 | * output PHDR, so the next output section - .init.text - should | 231 | * output PHDR, so the next output section - .init.text - should |
232 | * start another segment - init. | 232 | * start another segment - init. |
233 | */ | 233 | */ |
234 | PERCPU_VADDR(0, :percpu) | 234 | PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) |
235 | #endif | 235 | #endif |
236 | 236 | ||
237 | INIT_TEXT_SECTION(PAGE_SIZE) | 237 | INIT_TEXT_SECTION(PAGE_SIZE) |
@@ -306,7 +306,7 @@ SECTIONS | |||
306 | } | 306 | } |
307 | 307 | ||
308 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 308 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
309 | PERCPU(PAGE_SIZE) | 309 | PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) |
310 | #endif | 310 | #endif |
311 | 311 | ||
312 | . = ALIGN(PAGE_SIZE); | 312 | . = ALIGN(PAGE_SIZE); |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede0..f2479f19ddde 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -42,4 +42,5 @@ else | |||
42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
45 | lib-y += cmpxchg16b_emu.o | ||
45 | endif | 46 | endif |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 000000000000..3e8b08a6de2b --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/alternative-asm.h> | ||
10 | #include <asm/frame.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | .text | ||
14 | |||
15 | /* | ||
16 | * Inputs: | ||
17 | * %rsi : memory location to compare | ||
18 | * %rax : low 64 bits of old value | ||
19 | * %rdx : high 64 bits of old value | ||
20 | * %rbx : low 64 bits of new value | ||
21 | * %rcx : high 64 bits of new value | ||
22 | * %al : Operation successful | ||
23 | */ | ||
24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
25 | CFI_STARTPROC | ||
26 | |||
27 | # | ||
28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
29 | # via the ZF. Caller will access %al to get result. | ||
30 | # | ||
31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
32 | # do *not* have a fully atomic operation but just an operation that is | ||
33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
34 | # macros). | ||
35 | # | ||
36 | this_cpu_cmpxchg16b_emu: | ||
37 | pushf | ||
38 | cli | ||
39 | |||
40 | cmpq %gs:(%rsi), %rax | ||
41 | jne not_same | ||
42 | cmpq %gs:8(%rsi), %rdx | ||
43 | jne not_same | ||
44 | |||
45 | movq %rbx, %gs:(%rsi) | ||
46 | movq %rcx, %gs:8(%rsi) | ||
47 | |||
48 | popf | ||
49 | mov $1, %al | ||
50 | ret | ||
51 | |||
52 | not_same: | ||
53 | popf | ||
54 | xor %al,%al | ||
55 | ret | ||
56 | |||
57 | CFI_ENDPROC | ||
58 | |||
59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 9b526154c9ba..a2820065927e 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S | |||
@@ -155,7 +155,7 @@ SECTIONS | |||
155 | INIT_RAM_FS | 155 | INIT_RAM_FS |
156 | } | 156 | } |
157 | 157 | ||
158 | PERCPU(PAGE_SIZE) | 158 | PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE) |
159 | 159 | ||
160 | /* We need this dummy segment here */ | 160 | /* We need this dummy segment here */ |
161 | 161 | ||
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 906c3ceca9a2..32c45e5fe0ab 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
@@ -15,7 +15,7 @@ | |||
15 | * HEAD_TEXT_SECTION | 15 | * HEAD_TEXT_SECTION |
16 | * INIT_TEXT_SECTION(PAGE_SIZE) | 16 | * INIT_TEXT_SECTION(PAGE_SIZE) |
17 | * INIT_DATA_SECTION(...) | 17 | * INIT_DATA_SECTION(...) |
18 | * PERCPU(PAGE_SIZE) | 18 | * PERCPU(CACHELINE_SIZE, PAGE_SIZE) |
19 | * __init_end = .; | 19 | * __init_end = .; |
20 | * | 20 | * |
21 | * _stext = .; | 21 | * _stext = .; |
@@ -689,13 +689,18 @@ | |||
689 | 689 | ||
690 | /** | 690 | /** |
691 | * PERCPU_VADDR - define output section for percpu area | 691 | * PERCPU_VADDR - define output section for percpu area |
692 | * @cacheline: cacheline size | ||
692 | * @vaddr: explicit base address (optional) | 693 | * @vaddr: explicit base address (optional) |
693 | * @phdr: destination PHDR (optional) | 694 | * @phdr: destination PHDR (optional) |
694 | * | 695 | * |
695 | * Macro which expands to output section for percpu area. If @vaddr | 696 | * Macro which expands to output section for percpu area. |
696 | * is not blank, it specifies explicit base address and all percpu | 697 | * |
697 | * symbols will be offset from the given address. If blank, @vaddr | 698 | * @cacheline is used to align subsections to avoid false cacheline |
698 | * always equals @laddr + LOAD_OFFSET. | 699 | * sharing between subsections for different purposes. |
700 | * | ||
701 | * If @vaddr is not blank, it specifies explicit base address and all | ||
702 | * percpu symbols will be offset from the given address. If blank, | ||
703 | * @vaddr always equals @laddr + LOAD_OFFSET. | ||
699 | * | 704 | * |
700 | * @phdr defines the output PHDR to use if not blank. Be warned that | 705 | * @phdr defines the output PHDR to use if not blank. Be warned that |
701 | * output PHDR is sticky. If @phdr is specified, the next output | 706 | * output PHDR is sticky. If @phdr is specified, the next output |
@@ -706,7 +711,7 @@ | |||
706 | * If there is no need to put the percpu section at a predetermined | 711 | * If there is no need to put the percpu section at a predetermined |
707 | * address, use PERCPU(). | 712 | * address, use PERCPU(). |
708 | */ | 713 | */ |
709 | #define PERCPU_VADDR(vaddr, phdr) \ | 714 | #define PERCPU_VADDR(cacheline, vaddr, phdr) \ |
710 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ | 715 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ |
711 | .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ | 716 | .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ |
712 | - LOAD_OFFSET) { \ | 717 | - LOAD_OFFSET) { \ |
@@ -714,7 +719,9 @@ | |||
714 | *(.data..percpu..first) \ | 719 | *(.data..percpu..first) \ |
715 | . = ALIGN(PAGE_SIZE); \ | 720 | . = ALIGN(PAGE_SIZE); \ |
716 | *(.data..percpu..page_aligned) \ | 721 | *(.data..percpu..page_aligned) \ |
722 | . = ALIGN(cacheline); \ | ||
717 | *(.data..percpu..readmostly) \ | 723 | *(.data..percpu..readmostly) \ |
724 | . = ALIGN(cacheline); \ | ||
718 | *(.data..percpu) \ | 725 | *(.data..percpu) \ |
719 | *(.data..percpu..shared_aligned) \ | 726 | *(.data..percpu..shared_aligned) \ |
720 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | 727 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ |
@@ -723,18 +730,18 @@ | |||
723 | 730 | ||
724 | /** | 731 | /** |
725 | * PERCPU - define output section for percpu area, simple version | 732 | * PERCPU - define output section for percpu area, simple version |
733 | * @cacheline: cacheline size | ||
726 | * @align: required alignment | 734 | * @align: required alignment |
727 | * | 735 | * |
728 | * Align to @align and outputs output section for percpu area. This | 736 | * Align to @align and outputs output section for percpu area. This macro |
729 | * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and | 737 | * doesn't manipulate @vaddr or @phdr and __per_cpu_load and |
730 | * __per_cpu_start will be identical. | 738 | * __per_cpu_start will be identical. |
731 | * | 739 | * |
732 | * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except | 740 | * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,) |
733 | * that __per_cpu_load is defined as a relative symbol against | 741 | * except that __per_cpu_load is defined as a relative symbol against |
734 | * .data..percpu which is required for relocatable x86_32 | 742 | * .data..percpu which is required for relocatable x86_32 configuration. |
735 | * configuration. | ||
736 | */ | 743 | */ |
737 | #define PERCPU(align) \ | 744 | #define PERCPU(cacheline, align) \ |
738 | . = ALIGN(align); \ | 745 | . = ALIGN(align); \ |
739 | .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ | 746 | .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ |
740 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ | 747 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ |
@@ -742,7 +749,9 @@ | |||
742 | *(.data..percpu..first) \ | 749 | *(.data..percpu..first) \ |
743 | . = ALIGN(PAGE_SIZE); \ | 750 | . = ALIGN(PAGE_SIZE); \ |
744 | *(.data..percpu..page_aligned) \ | 751 | *(.data..percpu..page_aligned) \ |
752 | . = ALIGN(cacheline); \ | ||
745 | *(.data..percpu..readmostly) \ | 753 | *(.data..percpu..readmostly) \ |
754 | . = ALIGN(cacheline); \ | ||
746 | *(.data..percpu) \ | 755 | *(.data..percpu) \ |
747 | *(.data..percpu..shared_aligned) \ | 756 | *(.data..percpu..shared_aligned) \ |
748 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | 757 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ |
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 27c3c6fcfad3..3a5c4449fd36 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -255,6 +255,30 @@ extern void __bad_size_call_parameter(void); | |||
255 | pscr2_ret__; \ | 255 | pscr2_ret__; \ |
256 | }) | 256 | }) |
257 | 257 | ||
258 | /* | ||
259 | * Special handling for cmpxchg_double. cmpxchg_double is passed two | ||
260 | * percpu variables. The first has to be aligned to a double word | ||
261 | * boundary and the second has to follow directly thereafter. | ||
262 | */ | ||
263 | #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ | ||
264 | ({ \ | ||
265 | bool pdcrb_ret__; \ | ||
266 | __verify_pcpu_ptr(&pcp1); \ | ||
267 | BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ | ||
268 | VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \ | ||
269 | VM_BUG_ON((unsigned long)(&pcp2) != \ | ||
270 | (unsigned long)(&pcp1) + sizeof(pcp1)); \ | ||
271 | switch(sizeof(pcp1)) { \ | ||
272 | case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ | ||
273 | case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ | ||
274 | case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ | ||
275 | case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ | ||
276 | default: \ | ||
277 | __bad_size_call_parameter(); break; \ | ||
278 | } \ | ||
279 | pdcrb_ret__; \ | ||
280 | }) | ||
281 | |||
258 | #define __pcpu_size_call(stem, variable, ...) \ | 282 | #define __pcpu_size_call(stem, variable, ...) \ |
259 | do { \ | 283 | do { \ |
260 | __verify_pcpu_ptr(&(variable)); \ | 284 | __verify_pcpu_ptr(&(variable)); \ |
@@ -501,6 +525,45 @@ do { \ | |||
501 | #endif | 525 | #endif |
502 | 526 | ||
503 | /* | 527 | /* |
528 | * cmpxchg_double replaces two adjacent scalars at once. The first | ||
529 | * two parameters are per cpu variables which have to be of the same | ||
530 | * size. A truth value is returned to indicate success or failure | ||
531 | * (since a double register result is difficult to handle). There is | ||
532 | * very limited hardware support for these operations, so only certain | ||
533 | * sizes may work. | ||
534 | */ | ||
535 | #define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
536 | ({ \ | ||
537 | int ret__; \ | ||
538 | preempt_disable(); \ | ||
539 | ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \ | ||
540 | oval1, oval2, nval1, nval2); \ | ||
541 | preempt_enable(); \ | ||
542 | ret__; \ | ||
543 | }) | ||
544 | |||
545 | #ifndef this_cpu_cmpxchg_double | ||
546 | # ifndef this_cpu_cmpxchg_double_1 | ||
547 | # define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
548 | _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
549 | # endif | ||
550 | # ifndef this_cpu_cmpxchg_double_2 | ||
551 | # define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
552 | _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
553 | # endif | ||
554 | # ifndef this_cpu_cmpxchg_double_4 | ||
555 | # define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
556 | _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
557 | # endif | ||
558 | # ifndef this_cpu_cmpxchg_double_8 | ||
559 | # define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
560 | _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
561 | # endif | ||
562 | # define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
563 | __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) | ||
564 | #endif | ||
565 | |||
566 | /* | ||
504 | * Generic percpu operations that do not require preemption handling. | 567 | * Generic percpu operations that do not require preemption handling. |
505 | * Either we do not care about races or the caller has the | 568 | * Either we do not care about races or the caller has the |
506 | * responsibility of handling preemptions issues. Arch code can still | 569 | * responsibility of handling preemptions issues. Arch code can still |
@@ -703,6 +766,39 @@ do { \ | |||
703 | __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) | 766 | __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) |
704 | #endif | 767 | #endif |
705 | 768 | ||
769 | #define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
770 | ({ \ | ||
771 | int __ret = 0; \ | ||
772 | if (__this_cpu_read(pcp1) == (oval1) && \ | ||
773 | __this_cpu_read(pcp2) == (oval2)) { \ | ||
774 | __this_cpu_write(pcp1, (nval1)); \ | ||
775 | __this_cpu_write(pcp2, (nval2)); \ | ||
776 | __ret = 1; \ | ||
777 | } \ | ||
778 | (__ret); \ | ||
779 | }) | ||
780 | |||
781 | #ifndef __this_cpu_cmpxchg_double | ||
782 | # ifndef __this_cpu_cmpxchg_double_1 | ||
783 | # define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
784 | __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
785 | # endif | ||
786 | # ifndef __this_cpu_cmpxchg_double_2 | ||
787 | # define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
788 | __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
789 | # endif | ||
790 | # ifndef __this_cpu_cmpxchg_double_4 | ||
791 | # define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
792 | __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
793 | # endif | ||
794 | # ifndef __this_cpu_cmpxchg_double_8 | ||
795 | # define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
796 | __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
797 | # endif | ||
798 | # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
799 | __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) | ||
800 | #endif | ||
801 | |||
706 | /* | 802 | /* |
707 | * IRQ safe versions of the per cpu RMW operations. Note that these operations | 803 | * IRQ safe versions of the per cpu RMW operations. Note that these operations |
708 | * are *not* safe against modification of the same variable from another | 804 | * are *not* safe against modification of the same variable from another |
@@ -823,4 +919,36 @@ do { \ | |||
823 | __pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval) | 919 | __pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval) |
824 | #endif | 920 | #endif |
825 | 921 | ||
922 | #define irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
923 | ({ \ | ||
924 | int ret__; \ | ||
925 | unsigned long flags; \ | ||
926 | local_irq_save(flags); \ | ||
927 | ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \ | ||
928 | oval1, oval2, nval1, nval2); \ | ||
929 | local_irq_restore(flags); \ | ||
930 | ret__; \ | ||
931 | }) | ||
932 | |||
933 | #ifndef irqsafe_cpu_cmpxchg_double | ||
934 | # ifndef irqsafe_cpu_cmpxchg_double_1 | ||
935 | # define irqsafe_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
936 | irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
937 | # endif | ||
938 | # ifndef irqsafe_cpu_cmpxchg_double_2 | ||
939 | # define irqsafe_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
940 | irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
941 | # endif | ||
942 | # ifndef irqsafe_cpu_cmpxchg_double_4 | ||
943 | # define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
944 | irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
945 | # endif | ||
946 | # ifndef irqsafe_cpu_cmpxchg_double_8 | ||
947 | # define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
948 | irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) | ||
949 | # endif | ||
950 | # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | ||
951 | __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) | ||
952 | #endif | ||
953 | |||
826 | #endif /* __LINUX_PERCPU_H */ | 954 | #endif /* __LINUX_PERCPU_H */ |