aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-01-13 06:41:35 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:19:14 -0500
commit3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 (patch)
tree1ce55b2ec16a0bd59a29857e05215960d463a1d8
parenta698c823e15149941b0f0281527d0c0d1daf2639 (diff)
x86: make percpu symbols zerobased on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_64.S24
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S17
-rw-r--r--include/asm-generic/sections.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h51
6 files changed, 88 insertions, 10 deletions
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..bc2900ca82c7 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -44,6 +44,8 @@ void __init x86_64_init_pda(void)
44{ 44{
45 _cpu_pda = __cpu_pda; 45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda; 46 cpu_pda(0) = &_boot_cpu_pda;
47 cpu_pda(0)->data_offset =
48 (unsigned long)(__per_cpu_load - __per_cpu_start);
47 pda_init(0); 49 pda_init(0);
48} 50}
49 51
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..7ee0363871e8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -204,6 +204,23 @@ ENTRY(secondary_startup_64)
204 pushq $0 204 pushq $0
205 popfq 205 popfq
206 206
207#ifdef CONFIG_SMP
208 /*
209 * early_gdt_base should point to the gdt_page in static percpu init
210 * data area. Computing this requires two symbols - __per_cpu_load
211 * and per_cpu__gdt_page. As linker can't do no such relocation, do
212 * it by hand. As early_gdt_descr is manipulated by C code for
213 * secondary CPUs, this should be done only once for the boot CPU
214 * when early_gdt_descr_base contains zero.
215 */
216 movq early_gdt_descr_base(%rip), %rax
217 testq %rax, %rax
218 jnz 1f
219 movq $__per_cpu_load, %rax
220 addq $per_cpu__gdt_page, %rax
221 movq %rax, early_gdt_descr_base(%rip)
2221:
223#endif
207 /* 224 /*
208 * We must switch to a new descriptor in kernel space for the GDT 225 * We must switch to a new descriptor in kernel space for the GDT
209 * because soon the kernel won't have access anymore to the userspace 226 * because soon the kernel won't have access anymore to the userspace
@@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 418 .globl early_gdt_descr
402early_gdt_descr: 419early_gdt_descr:
403 .word GDT_ENTRIES*8-1 420 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 421#ifdef CONFIG_SMP
422early_gdt_descr_base:
423 .quad 0x0000000000000000
424#else
425 .quad per_cpu__gdt_page
426#endif
405 427
406ENTRY(phys_base) 428ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 429 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 56c63ac62b10..44845842e722 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void)
213 } 213 }
214#endif 214#endif
215 per_cpu_offset(cpu) = ptr - __per_cpu_start; 215 per_cpu_offset(cpu) = ptr - __per_cpu_start;
216 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 216 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
217 217
218 DBG("PERCPU: cpu %4d %p\n", cpu, ptr); 218 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
219 } 219 }
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..f50280db0dfe 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,6 +19,9 @@ PHDRS {
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
22 note PT_NOTE FLAGS(0); /* ___ */ 25 note PT_NOTE FLAGS(0); /* ___ */
23} 26}
24SECTIONS 27SECTIONS
@@ -208,14 +211,26 @@ SECTIONS
208 __initramfs_end = .; 211 __initramfs_end = .;
209#endif 212#endif
210 213
214#ifdef CONFIG_SMP
215 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init.
219 */
220 . = ALIGN(PAGE_SIZE);
221 PERCPU_VADDR(0, :percpu)
222#else
211 PERCPU(PAGE_SIZE) 223 PERCPU(PAGE_SIZE)
224#endif
212 225
213 . = ALIGN(PAGE_SIZE); 226 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 227 __init_end = .;
215 228
216 . = ALIGN(PAGE_SIZE); 229 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 230 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 231 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
232 *(.data.nosave)
233 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 234 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 235 __nosave_end = .;
221 236
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 79a7ff925bf8..4ce48e878530 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
9extern char __init_begin[], __init_end[]; 9extern char __init_begin[], __init_end[];
10extern char _sinittext[], _einittext[]; 10extern char _sinittext[], _einittext[];
11extern char _end[]; 11extern char _end[];
12extern char __per_cpu_start[], __per_cpu_end[]; 12extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
13extern char __kprobes_text_start[], __kprobes_text_end[]; 13extern char __kprobes_text_start[], __kprobes_text_end[];
14extern char __initdata_begin[], __initdata_end[]; 14extern char __initdata_begin[], __initdata_end[];
15extern char __start_rodata[], __end_rodata[]; 15extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c61fab1dd2f8..fc2f55f2dcd6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,12 +430,51 @@
430 *(.initcall7.init) \ 430 *(.initcall7.init) \
431 *(.initcall7s.init) 431 *(.initcall7s.init)
432 432
433#define PERCPU(align) \ 433#define PERCPU_PROLOG(vaddr) \
434 . = ALIGN(align); \ 434 VMLINUX_SYMBOL(__per_cpu_load) = .; \
435 VMLINUX_SYMBOL(__per_cpu_start) = .; \ 435 .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \
436 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ 436 VMLINUX_SYMBOL(__per_cpu_start) = .;
437
438#define PERCPU_EPILOG(phdr) \
439 VMLINUX_SYMBOL(__per_cpu_end) = .; \
440 } phdr \
441 . = __per_cpu_load + SIZEOF(.data.percpu);
442
443/**
444 * PERCPU_VADDR - define output section for percpu area
445 * @vaddr: explicit base address (optional)
446 * @phdr: destination PHDR (optional)
447 *
448 * Macro which expands to output section for percpu area. If @vaddr
449 * is not blank, it specifies explicit base address and all percpu
450 * symbols will be offset from the given address. If blank, @vaddr
451 * always equals @laddr + LOAD_OFFSET.
452 *
453 * @phdr defines the output PHDR to use if not blank. Be warned that
454 * output PHDR is sticky. If @phdr is specified, the next output
455 * section in the linker script will go there too. @phdr should have
456 * a leading colon.
457 *
458 * This macro defines three symbols, __per_cpu_load, __per_cpu_start
459 * and __per_cpu_end. The first one is the vaddr of loaded percpu
460 * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
461 * end offset.
462 */
463#define PERCPU_VADDR(vaddr, phdr) \
464 PERCPU_PROLOG(vaddr) \
437 *(.data.percpu.page_aligned) \ 465 *(.data.percpu.page_aligned) \
438 *(.data.percpu) \ 466 *(.data.percpu) \
439 *(.data.percpu.shared_aligned) \ 467 *(.data.percpu.shared_aligned) \
440 } \ 468 PERCPU_EPILOG(phdr)
441 VMLINUX_SYMBOL(__per_cpu_end) = .; 469
470/**
471 * PERCPU - define output section for percpu area, simple version
472 * @align: required alignment
473 *
474 * Align to @align and outputs output section for percpu area. This
475 * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
476 * __per_cpu_start will be identical.
477 */
478#define PERCPU(align) \
479 . = ALIGN(align); \
480 PERCPU_VADDR( , )