diff options
author | Tejun Heo <tj@kernel.org> | 2009-01-13 06:41:35 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:19:14 -0500 |
commit | 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 (patch) | |
tree | 1ce55b2ec16a0bd59a29857e05215960d463a1d8 | |
parent | a698c823e15149941b0f0281527d0c0d1daf2639 (diff) |
x86: make percpu symbols zerobased on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ]
This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S. A new
PHDR is added as existing ones cannot contain sections near address
zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.
The following adjustments have been made to accomodate the address
change.
* code to locate percpu gdt_page in head_64.S is updated to add the
load address to the gdt_page offset.
* __per_cpu_load is used in places where access to the init data area
is necessary.
* pda->data_offset is initialized soon after C code is entered as zero
value doesn't work anymore.
This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/head64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 24 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_64.lds.S | 17 | ||||
-rw-r--r-- | include/asm-generic/sections.h | 2 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 51 |
6 files changed, 88 insertions, 10 deletions
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b935..bc2900ca82c7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -44,6 +44,8 @@ void __init x86_64_init_pda(void) | |||
44 | { | 44 | { |
45 | _cpu_pda = __cpu_pda; | 45 | _cpu_pda = __cpu_pda; |
46 | cpu_pda(0) = &_boot_cpu_pda; | 46 | cpu_pda(0) = &_boot_cpu_pda; |
47 | cpu_pda(0)->data_offset = | ||
48 | (unsigned long)(__per_cpu_load - __per_cpu_start); | ||
47 | pda_init(0); | 49 | pda_init(0); |
48 | } | 50 | } |
49 | 51 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d495563..7ee0363871e8 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -204,6 +204,23 @@ ENTRY(secondary_startup_64) | |||
204 | pushq $0 | 204 | pushq $0 |
205 | popfq | 205 | popfq |
206 | 206 | ||
207 | #ifdef CONFIG_SMP | ||
208 | /* | ||
209 | * early_gdt_base should point to the gdt_page in static percpu init | ||
210 | * data area. Computing this requires two symbols - __per_cpu_load | ||
211 | * and per_cpu__gdt_page. As linker can't do no such relocation, do | ||
212 | * it by hand. As early_gdt_descr is manipulated by C code for | ||
213 | * secondary CPUs, this should be done only once for the boot CPU | ||
214 | * when early_gdt_descr_base contains zero. | ||
215 | */ | ||
216 | movq early_gdt_descr_base(%rip), %rax | ||
217 | testq %rax, %rax | ||
218 | jnz 1f | ||
219 | movq $__per_cpu_load, %rax | ||
220 | addq $per_cpu__gdt_page, %rax | ||
221 | movq %rax, early_gdt_descr_base(%rip) | ||
222 | 1: | ||
223 | #endif | ||
207 | /* | 224 | /* |
208 | * We must switch to a new descriptor in kernel space for the GDT | 225 | * We must switch to a new descriptor in kernel space for the GDT |
209 | * because soon the kernel won't have access anymore to the userspace | 226 | * because soon the kernel won't have access anymore to the userspace |
@@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt) | |||
401 | .globl early_gdt_descr | 418 | .globl early_gdt_descr |
402 | early_gdt_descr: | 419 | early_gdt_descr: |
403 | .word GDT_ENTRIES*8-1 | 420 | .word GDT_ENTRIES*8-1 |
404 | .quad per_cpu__gdt_page | 421 | #ifdef CONFIG_SMP |
422 | early_gdt_descr_base: | ||
423 | .quad 0x0000000000000000 | ||
424 | #else | ||
425 | .quad per_cpu__gdt_page | ||
426 | #endif | ||
405 | 427 | ||
406 | ENTRY(phys_base) | 428 | ENTRY(phys_base) |
407 | /* This must match the first entry in level2_kernel_pgt */ | 429 | /* This must match the first entry in level2_kernel_pgt */ |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 56c63ac62b10..44845842e722 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void) | |||
213 | } | 213 | } |
214 | #endif | 214 | #endif |
215 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 215 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
216 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 216 | memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); |
217 | 217 | ||
218 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); | 218 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); |
219 | } | 219 | } |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6bef..f50280db0dfe 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -19,6 +19,9 @@ PHDRS { | |||
19 | data PT_LOAD FLAGS(7); /* RWE */ | 19 | data PT_LOAD FLAGS(7); /* RWE */ |
20 | user PT_LOAD FLAGS(7); /* RWE */ | 20 | user PT_LOAD FLAGS(7); /* RWE */ |
21 | data.init PT_LOAD FLAGS(7); /* RWE */ | 21 | data.init PT_LOAD FLAGS(7); /* RWE */ |
22 | #ifdef CONFIG_SMP | ||
23 | percpu PT_LOAD FLAGS(7); /* RWE */ | ||
24 | #endif | ||
22 | note PT_NOTE FLAGS(0); /* ___ */ | 25 | note PT_NOTE FLAGS(0); /* ___ */ |
23 | } | 26 | } |
24 | SECTIONS | 27 | SECTIONS |
@@ -208,14 +211,26 @@ SECTIONS | |||
208 | __initramfs_end = .; | 211 | __initramfs_end = .; |
209 | #endif | 212 | #endif |
210 | 213 | ||
214 | #ifdef CONFIG_SMP | ||
215 | /* | ||
216 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
217 | * output PHDR, so the next output section - __data_nosave - should | ||
218 | * switch it back to data.init. | ||
219 | */ | ||
220 | . = ALIGN(PAGE_SIZE); | ||
221 | PERCPU_VADDR(0, :percpu) | ||
222 | #else | ||
211 | PERCPU(PAGE_SIZE) | 223 | PERCPU(PAGE_SIZE) |
224 | #endif | ||
212 | 225 | ||
213 | . = ALIGN(PAGE_SIZE); | 226 | . = ALIGN(PAGE_SIZE); |
214 | __init_end = .; | 227 | __init_end = .; |
215 | 228 | ||
216 | . = ALIGN(PAGE_SIZE); | 229 | . = ALIGN(PAGE_SIZE); |
217 | __nosave_begin = .; | 230 | __nosave_begin = .; |
218 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } | 231 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
232 | *(.data.nosave) | ||
233 | } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ | ||
219 | . = ALIGN(PAGE_SIZE); | 234 | . = ALIGN(PAGE_SIZE); |
220 | __nosave_end = .; | 235 | __nosave_end = .; |
221 | 236 | ||
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 79a7ff925bf8..4ce48e878530 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h | |||
@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[]; | |||
9 | extern char __init_begin[], __init_end[]; | 9 | extern char __init_begin[], __init_end[]; |
10 | extern char _sinittext[], _einittext[]; | 10 | extern char _sinittext[], _einittext[]; |
11 | extern char _end[]; | 11 | extern char _end[]; |
12 | extern char __per_cpu_start[], __per_cpu_end[]; | 12 | extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; |
13 | extern char __kprobes_text_start[], __kprobes_text_end[]; | 13 | extern char __kprobes_text_start[], __kprobes_text_end[]; |
14 | extern char __initdata_begin[], __initdata_end[]; | 14 | extern char __initdata_begin[], __initdata_end[]; |
15 | extern char __start_rodata[], __end_rodata[]; | 15 | extern char __start_rodata[], __end_rodata[]; |
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f8..fc2f55f2dcd6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
@@ -430,12 +430,51 @@ | |||
430 | *(.initcall7.init) \ | 430 | *(.initcall7.init) \ |
431 | *(.initcall7s.init) | 431 | *(.initcall7s.init) |
432 | 432 | ||
433 | #define PERCPU(align) \ | 433 | #define PERCPU_PROLOG(vaddr) \ |
434 | . = ALIGN(align); \ | 434 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ |
435 | VMLINUX_SYMBOL(__per_cpu_start) = .; \ | 435 | .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ |
436 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ | 436 | VMLINUX_SYMBOL(__per_cpu_start) = .; |
437 | |||
438 | #define PERCPU_EPILOG(phdr) \ | ||
439 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | ||
440 | } phdr \ | ||
441 | . = __per_cpu_load + SIZEOF(.data.percpu); | ||
442 | |||
443 | /** | ||
444 | * PERCPU_VADDR - define output section for percpu area | ||
445 | * @vaddr: explicit base address (optional) | ||
446 | * @phdr: destination PHDR (optional) | ||
447 | * | ||
448 | * Macro which expands to output section for percpu area. If @vaddr | ||
449 | * is not blank, it specifies explicit base address and all percpu | ||
450 | * symbols will be offset from the given address. If blank, @vaddr | ||
451 | * always equals @laddr + LOAD_OFFSET. | ||
452 | * | ||
453 | * @phdr defines the output PHDR to use if not blank. Be warned that | ||
454 | * output PHDR is sticky. If @phdr is specified, the next output | ||
455 | * section in the linker script will go there too. @phdr should have | ||
456 | * a leading colon. | ||
457 | * | ||
458 | * This macro defines three symbols, __per_cpu_load, __per_cpu_start | ||
459 | * and __per_cpu_end. The first one is the vaddr of loaded percpu | ||
460 | * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the | ||
461 | * end offset. | ||
462 | */ | ||
463 | #define PERCPU_VADDR(vaddr, phdr) \ | ||
464 | PERCPU_PROLOG(vaddr) \ | ||
437 | *(.data.percpu.page_aligned) \ | 465 | *(.data.percpu.page_aligned) \ |
438 | *(.data.percpu) \ | 466 | *(.data.percpu) \ |
439 | *(.data.percpu.shared_aligned) \ | 467 | *(.data.percpu.shared_aligned) \ |
440 | } \ | 468 | PERCPU_EPILOG(phdr) |
441 | VMLINUX_SYMBOL(__per_cpu_end) = .; | 469 | |
470 | /** | ||
471 | * PERCPU - define output section for percpu area, simple version | ||
472 | * @align: required alignment | ||
473 | * | ||
474 | * Align to @align and outputs output section for percpu area. This | ||
475 | * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and | ||
476 | * __per_cpu_start will be identical. | ||
477 | */ | ||
478 | #define PERCPU(align) \ | ||
479 | . = ALIGN(align); \ | ||
480 | PERCPU_VADDR( , ) | ||