diff options
author | Tejun Heo <tj@kernel.org> | 2009-01-13 06:41:35 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:19:14 -0500 |
commit | 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 (patch) | |
tree | 1ce55b2ec16a0bd59a29857e05215960d463a1d8 /arch | |
parent | a698c823e15149941b0f0281527d0c0d1daf2639 (diff) |
x86: make percpu symbols zerobased on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ]
This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S. A new
PHDR is added as existing ones cannot contain sections near address
zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.
The following adjustments have been made to accomodate the address
change.
* code to locate percpu gdt_page in head_64.S is updated to add the
load address to the gdt_page offset.
* __per_cpu_load is used in places where access to the init data area
is necessary.
* pda->data_offset is initialized soon after C code is entered as zero
value doesn't work anymore.
This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/head64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 24 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_64.lds.S | 17 |
4 files changed, 42 insertions, 3 deletions
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b935..bc2900ca82c7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -44,6 +44,8 @@ void __init x86_64_init_pda(void) | |||
44 | { | 44 | { |
45 | _cpu_pda = __cpu_pda; | 45 | _cpu_pda = __cpu_pda; |
46 | cpu_pda(0) = &_boot_cpu_pda; | 46 | cpu_pda(0) = &_boot_cpu_pda; |
47 | cpu_pda(0)->data_offset = | ||
48 | (unsigned long)(__per_cpu_load - __per_cpu_start); | ||
47 | pda_init(0); | 49 | pda_init(0); |
48 | } | 50 | } |
49 | 51 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d495563..7ee0363871e8 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -204,6 +204,23 @@ ENTRY(secondary_startup_64) | |||
204 | pushq $0 | 204 | pushq $0 |
205 | popfq | 205 | popfq |
206 | 206 | ||
207 | #ifdef CONFIG_SMP | ||
208 | /* | ||
209 | * early_gdt_base should point to the gdt_page in static percpu init | ||
210 | * data area. Computing this requires two symbols - __per_cpu_load | ||
211 | * and per_cpu__gdt_page. As linker can't do no such relocation, do | ||
212 | * it by hand. As early_gdt_descr is manipulated by C code for | ||
213 | * secondary CPUs, this should be done only once for the boot CPU | ||
214 | * when early_gdt_descr_base contains zero. | ||
215 | */ | ||
216 | movq early_gdt_descr_base(%rip), %rax | ||
217 | testq %rax, %rax | ||
218 | jnz 1f | ||
219 | movq $__per_cpu_load, %rax | ||
220 | addq $per_cpu__gdt_page, %rax | ||
221 | movq %rax, early_gdt_descr_base(%rip) | ||
222 | 1: | ||
223 | #endif | ||
207 | /* | 224 | /* |
208 | * We must switch to a new descriptor in kernel space for the GDT | 225 | * We must switch to a new descriptor in kernel space for the GDT |
209 | * because soon the kernel won't have access anymore to the userspace | 226 | * because soon the kernel won't have access anymore to the userspace |
@@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt) | |||
401 | .globl early_gdt_descr | 418 | .globl early_gdt_descr |
402 | early_gdt_descr: | 419 | early_gdt_descr: |
403 | .word GDT_ENTRIES*8-1 | 420 | .word GDT_ENTRIES*8-1 |
404 | .quad per_cpu__gdt_page | 421 | #ifdef CONFIG_SMP |
422 | early_gdt_descr_base: | ||
423 | .quad 0x0000000000000000 | ||
424 | #else | ||
425 | .quad per_cpu__gdt_page | ||
426 | #endif | ||
405 | 427 | ||
406 | ENTRY(phys_base) | 428 | ENTRY(phys_base) |
407 | /* This must match the first entry in level2_kernel_pgt */ | 429 | /* This must match the first entry in level2_kernel_pgt */ |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 56c63ac62b10..44845842e722 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void) | |||
213 | } | 213 | } |
214 | #endif | 214 | #endif |
215 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 215 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
216 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 216 | memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); |
217 | 217 | ||
218 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); | 218 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); |
219 | } | 219 | } |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6bef..f50280db0dfe 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -19,6 +19,9 @@ PHDRS { | |||
19 | data PT_LOAD FLAGS(7); /* RWE */ | 19 | data PT_LOAD FLAGS(7); /* RWE */ |
20 | user PT_LOAD FLAGS(7); /* RWE */ | 20 | user PT_LOAD FLAGS(7); /* RWE */ |
21 | data.init PT_LOAD FLAGS(7); /* RWE */ | 21 | data.init PT_LOAD FLAGS(7); /* RWE */ |
22 | #ifdef CONFIG_SMP | ||
23 | percpu PT_LOAD FLAGS(7); /* RWE */ | ||
24 | #endif | ||
22 | note PT_NOTE FLAGS(0); /* ___ */ | 25 | note PT_NOTE FLAGS(0); /* ___ */ |
23 | } | 26 | } |
24 | SECTIONS | 27 | SECTIONS |
@@ -208,14 +211,26 @@ SECTIONS | |||
208 | __initramfs_end = .; | 211 | __initramfs_end = .; |
209 | #endif | 212 | #endif |
210 | 213 | ||
214 | #ifdef CONFIG_SMP | ||
215 | /* | ||
216 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
217 | * output PHDR, so the next output section - __data_nosave - should | ||
218 | * switch it back to data.init. | ||
219 | */ | ||
220 | . = ALIGN(PAGE_SIZE); | ||
221 | PERCPU_VADDR(0, :percpu) | ||
222 | #else | ||
211 | PERCPU(PAGE_SIZE) | 223 | PERCPU(PAGE_SIZE) |
224 | #endif | ||
212 | 225 | ||
213 | . = ALIGN(PAGE_SIZE); | 226 | . = ALIGN(PAGE_SIZE); |
214 | __init_end = .; | 227 | __init_end = .; |
215 | 228 | ||
216 | . = ALIGN(PAGE_SIZE); | 229 | . = ALIGN(PAGE_SIZE); |
217 | __nosave_begin = .; | 230 | __nosave_begin = .; |
218 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } | 231 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
232 | *(.data.nosave) | ||
233 | } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ | ||
219 | . = ALIGN(PAGE_SIZE); | 234 | . = ALIGN(PAGE_SIZE); |
220 | __nosave_end = .; | 235 | __nosave_end = .; |
221 | 236 | ||