aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/head.S
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@in.ibm.com>2007-05-02 13:27:07 -0400
committerAndi Kleen <andi@basil.nowhere.org>2007-05-02 13:27:07 -0400
commit1ab60e0f72f71ec54831e525a3e1154f1c092408 (patch)
treebd7dd8bbff43e3e2e3597f2b7780e82a856bb9d7 /arch/x86_64/kernel/head.S
parent0dbf7028c0c1f266c9631139450a1502d3cd457e (diff)
[PATCH] x86-64: Relocatable Kernel Support
This patch modifies the x86_64 kernel so that it can be loaded and run at any 2M aligned address, below 512G. The technique used is to compile the decompressor with -fPIC and modify it so the decompressor is fully relocatable. For the main kernel the page tables are modified so the kernel remains at the same virtual address. In addition a variable phys_base is kept that holds the physical address the kernel is loaded at. __pa_symbol is modified to add that when we take the address of a kernel symbol. When loaded with a normal bootloader the decompressor will decompress the kernel to 2M and it will run there. This both ensures the relocation code is always working, and makes it easier to use 2M pages for the kernel and the cpu. AK: changed to not make RELOCATABLE default in Kconfig Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64/kernel/head.S')
-rw-r--r--arch/x86_64/kernel/head.S233
1 files changed, 130 insertions, 103 deletions
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index c211e52f1333..36aa98a6d15c 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> 6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> 7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
8 * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
8 */ 9 */
9 10
10 11
@@ -17,95 +18,127 @@
17#include <asm/page.h> 18#include <asm/page.h>
18#include <asm/msr.h> 19#include <asm/msr.h>
19#include <asm/cache.h> 20#include <asm/cache.h>
20 21
21/* we are not able to switch in one step to the final KERNEL ADRESS SPACE 22/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
22 * because we need identity-mapped pages on setup so define __START_KERNEL to 23 * because we need identity-mapped pages.
23 * 0x100000 for this stage 24 *
24 *
25 */ 25 */
26 26
27 .text 27 .text
28 .section .bootstrap.text 28 .section .bootstrap.text
29 .code32 29 .code64
30 .globl startup_32 30 .globl startup_64
31/* %bx: 1 if coming from smp trampoline on secondary cpu */ 31startup_64:
32startup_32: 32
33
34 /* 33 /*
35 * At this point the CPU runs in 32bit protected mode (CS.D = 1) with 34 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
36 * paging disabled and the point of this file is to switch to 64bit 35 * and someone has loaded an identity mapped page table
37 * long mode with a kernel mapping for kerneland to jump into the 36 * for us. These identity mapped page tables map all of the
38 * kernel virtual addresses. 37 * kernel pages and possibly all of memory.
39 * There is no stack until we set one up. 38 *
39 * %esi holds a physical pointer to real_mode_data.
40 *
41 * We come here either directly from a 64bit bootloader, or from
42 * arch/x86_64/boot/compressed/head.S.
43 *
44 * We only come here initially at boot nothing else comes here.
45 *
46 * Since we may be loaded at an address different from what we were
47 * compiled to run at we first fixup the physical addresses in our page
48 * tables and then reload them.
40 */ 49 */
41 50
42 /* Initialize the %ds segment register */ 51 /* Compute the delta between the address I am compiled to run at and the
43 movl $__KERNEL_DS,%eax 52 * address I am actually running at.
44 movl %eax,%ds
45
46 /* Load new GDT with the 64bit segments using 32bit descriptor */
47 lgdt pGDT32 - __START_KERNEL_map
48
49 /* If the CPU doesn't support CPUID this will double fault.
50 * Unfortunately it is hard to check for CPUID without a stack.
51 */ 53 */
52 54 leaq _text(%rip), %rbp
53 /* Check if extended functions are implemented */ 55 subq $_text - __START_KERNEL_map, %rbp
54 movl $0x80000000, %eax 56
55 cpuid 57 /* Is the address not 2M aligned? */
56 cmpl $0x80000000, %eax 58 movq %rbp, %rax
57 jbe no_long_mode 59 andl $~LARGE_PAGE_MASK, %eax
58 /* Check if long mode is implemented */ 60 testl %eax, %eax
59 mov $0x80000001, %eax 61 jnz bad_address
60 cpuid 62
61 btl $29, %edx 63 /* Is the address too large? */
62 jnc no_long_mode 64 leaq _text(%rip), %rdx
63 65 movq $PGDIR_SIZE, %rax
64 /* 66 cmpq %rax, %rdx
65 * Prepare for entering 64bits mode 67 jae bad_address
68
69 /* Fixup the physical addresses in the page table
66 */ 70 */
71 addq %rbp, init_level4_pgt + 0(%rip)
72 addq %rbp, init_level4_pgt + (258*8)(%rip)
73 addq %rbp, init_level4_pgt + (511*8)(%rip)
74
75 addq %rbp, level3_ident_pgt + 0(%rip)
76 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
77
78 /* Add an Identity mapping if I am above 1G */
79 leaq _text(%rip), %rdi
80 andq $LARGE_PAGE_MASK, %rdi
81
82 movq %rdi, %rax
83 shrq $PUD_SHIFT, %rax
84 andq $(PTRS_PER_PUD - 1), %rax
85 jz ident_complete
86
87 leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
88 leaq level3_ident_pgt(%rip), %rbx
89 movq %rdx, 0(%rbx, %rax, 8)
90
91 movq %rdi, %rax
92 shrq $PMD_SHIFT, %rax
93 andq $(PTRS_PER_PMD - 1), %rax
94 leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
95 leaq level2_spare_pgt(%rip), %rbx
96 movq %rdx, 0(%rbx, %rax, 8)
97ident_complete:
98
99 /* Fixup the kernel text+data virtual addresses
100 */
101 leaq level2_kernel_pgt(%rip), %rdi
102 leaq 4096(%rdi), %r8
103 /* See if it is a valid page table entry */
1041: testq $1, 0(%rdi)
105 jz 2f
106 addq %rbp, 0(%rdi)
107 /* Go to the next page */
1082: addq $8, %rdi
109 cmp %r8, %rdi
110 jne 1b
111
112 /* Fixup phys_base */
113 addq %rbp, phys_base(%rip)
67 114
68 /* Enable PAE mode */ 115#ifdef CONFIG_SMP
69 xorl %eax, %eax 116 addq %rbp, trampoline_level4_pgt + 0(%rip)
70 btsl $5, %eax 117 addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
71 movl %eax, %cr4 118#endif
72 119#ifdef CONFIG_ACPI_SLEEP
73 /* Setup early boot stage 4 level pagetables */ 120 addq %rbp, wakeup_level4_pgt + 0(%rip)
74 movl $(init_level4_pgt - __START_KERNEL_map), %eax 121 addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
75 movl %eax, %cr3 122#endif
76
77 /* Setup EFER (Extended Feature Enable Register) */
78 movl $MSR_EFER, %ecx
79 rdmsr
80
81 /* Enable Long Mode */
82 btsl $_EFER_LME, %eax
83
84 /* Make changes effective */
85 wrmsr
86 123
87 xorl %eax, %eax 124 /* Due to ENTRY(), sometimes the empty space gets filled with
88 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 125 * zeros. Better take a jmp than relying on empty space being
89 btsl $0, %eax /* Enable protected mode */ 126 * filled with 0x90 (nop)
90 /* Make changes effective */
91 movl %eax, %cr0
92 /*
93 * At this point we're in long mode but in 32bit compatibility mode
94 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
95 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
96 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
97 */ 127 */
98 ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) 128 jmp secondary_startup_64
99
100 .code64
101 .org 0x100
102 .globl startup_64
103startup_64:
104ENTRY(secondary_startup_64) 129ENTRY(secondary_startup_64)
105 /* We come here either from startup_32 130 /*
106 * or directly from a 64bit bootloader. 131 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
107 * Since we may have come directly from a bootloader we 132 * and someone has loaded a mapped page table.
108 * reload the page tables here. 133 *
134 * %esi holds a physical pointer to real_mode_data.
135 *
136 * We come here either from startup_64 (using physical addresses)
137 * or from trampoline.S (using virtual addresses).
138 *
139 * Using virtual addresses from trampoline.S removes the need
140 * to have any identity mapped pages in the kernel page table
141 * after the boot processor executes this code.
109 */ 142 */
110 143
111 /* Enable PAE mode and PGE */ 144 /* Enable PAE mode and PGE */
@@ -116,8 +149,14 @@ ENTRY(secondary_startup_64)
116 149
117 /* Setup early boot stage 4 level pagetables. */ 150 /* Setup early boot stage 4 level pagetables. */
118 movq $(init_level4_pgt - __START_KERNEL_map), %rax 151 movq $(init_level4_pgt - __START_KERNEL_map), %rax
152 addq phys_base(%rip), %rax
119 movq %rax, %cr3 153 movq %rax, %cr3
120 154
155 /* Ensure I am executing from virtual addresses */
156 movq $1f, %rax
157 jmp *%rax
1581:
159
121 /* Check if nx is implemented */ 160 /* Check if nx is implemented */
122 movl $0x80000001, %eax 161 movl $0x80000001, %eax
123 cpuid 162 cpuid
@@ -126,17 +165,11 @@ ENTRY(secondary_startup_64)
126 /* Setup EFER (Extended Feature Enable Register) */ 165 /* Setup EFER (Extended Feature Enable Register) */
127 movl $MSR_EFER, %ecx 166 movl $MSR_EFER, %ecx
128 rdmsr 167 rdmsr
129 168 btsl $_EFER_SCE, %eax /* Enable System Call */
130 /* Enable System Call */ 169 btl $20,%edi /* No Execute supported? */
131 btsl $_EFER_SCE, %eax
132
133 /* No Execute supported? */
134 btl $20,%edi
135 jnc 1f 170 jnc 1f
136 btsl $_EFER_NX, %eax 171 btsl $_EFER_NX, %eax
1371: 1721: wrmsr /* Make changes effective */
138 /* Make changes effective */
139 wrmsr
140 173
141 /* Setup cr0 */ 174 /* Setup cr0 */
142#define CR0_PM 1 /* protected mode */ 175#define CR0_PM 1 /* protected mode */
@@ -163,7 +196,7 @@ ENTRY(secondary_startup_64)
163 * addresses where we're currently running on. We have to do that here 196 * addresses where we're currently running on. We have to do that here
164 * because in 32bit we couldn't load a 64bit linear address. 197 * because in 32bit we couldn't load a 64bit linear address.
165 */ 198 */
166 lgdt cpu_gdt_descr 199 lgdt cpu_gdt_descr(%rip)
167 200
168 /* set up data segments. actually 0 would do too */ 201 /* set up data segments. actually 0 would do too */
169 movl $__KERNEL_DS,%eax 202 movl $__KERNEL_DS,%eax
@@ -214,6 +247,9 @@ initial_code:
214init_rsp: 247init_rsp:
215 .quad init_thread_union+THREAD_SIZE-8 248 .quad init_thread_union+THREAD_SIZE-8
216 249
250bad_address:
251 jmp bad_address
252
217ENTRY(early_idt_handler) 253ENTRY(early_idt_handler)
218 cmpl $2,early_recursion_flag(%rip) 254 cmpl $2,early_recursion_flag(%rip)
219 jz 1f 255 jz 1f
@@ -242,23 +278,7 @@ early_idt_msg:
242early_idt_ripmsg: 278early_idt_ripmsg:
243 .asciz "RIP %s\n" 279 .asciz "RIP %s\n"
244 280
245.code32 281.balign PAGE_SIZE
246ENTRY(no_long_mode)
247 /* This isn't an x86-64 CPU so hang */
2481:
249 jmp 1b
250
251.org 0xf00
252 .globl pGDT32
253pGDT32:
254 .word gdt_end-cpu_gdt_table-1
255 .long cpu_gdt_table-__START_KERNEL_map
256
257.org 0xf10
258ljumpvector:
259 .long startup_64-__START_KERNEL_map
260 .word __KERNEL_CS
261
262ENTRY(stext) 282ENTRY(stext)
263ENTRY(_stext) 283ENTRY(_stext)
264 284
@@ -303,7 +323,7 @@ NEXT_PAGE(level2_ident_pgt)
303 * Don't set NX because code runs from these pages. 323 * Don't set NX because code runs from these pages.
304 */ 324 */
305 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) 325 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
306 326
307NEXT_PAGE(level2_kernel_pgt) 327NEXT_PAGE(level2_kernel_pgt)
308 /* 40MB kernel mapping. The kernel code cannot be bigger than that. 328 /* 40MB kernel mapping. The kernel code cannot be bigger than that.
309 When you change this change KERNEL_TEXT_SIZE in page.h too. */ 329 When you change this change KERNEL_TEXT_SIZE in page.h too. */
@@ -313,6 +333,9 @@ NEXT_PAGE(level2_kernel_pgt)
313 /* Module mapping starts here */ 333 /* Module mapping starts here */
314 .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0 334 .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
315 335
336NEXT_PAGE(level2_spare_pgt)
337 .fill 512,8,0
338
316#undef PMDS 339#undef PMDS
317#undef NEXT_PAGE 340#undef NEXT_PAGE
318 341
@@ -330,6 +353,10 @@ gdt:
330 .endr 353 .endr
331#endif 354#endif
332 355
356ENTRY(phys_base)
357 /* This must match the first entry in level2_kernel_pgt */
358 .quad 0x0000000000000000
359
333/* We need valid kernel segments for data and code in long mode too 360/* We need valid kernel segments for data and code in long mode too
334 * IRET will check the segment types kkeil 2000/10/28 361 * IRET will check the segment types kkeil 2000/10/28
335 * Also sysret mandates a special GDT layout 362 * Also sysret mandates a special GDT layout