diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 7 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_32.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/relocate_kernel_32.S | 174 |
4 files changed, 179 insertions, 31 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e3cba0b45600..7ecb679f0130 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1279,6 +1279,13 @@ config CRASH_DUMP | |||
1279 | (CONFIG_RELOCATABLE=y). | 1279 | (CONFIG_RELOCATABLE=y). |
1280 | For more details see Documentation/kdump/kdump.txt | 1280 | For more details see Documentation/kdump/kdump.txt |
1281 | 1281 | ||
1282 | config KEXEC_JUMP | ||
1283 | bool "kexec jump (EXPERIMENTAL)" | ||
1284 | depends on EXPERIMENTAL | ||
1285 | depends on KEXEC && PM_SLEEP && X86_32 | ||
1286 | help | ||
1287 | Invoke code in physical address mode via KEXEC | ||
1288 | |||
1282 | config PHYSICAL_START | 1289 | config PHYSICAL_START |
1283 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 1290 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) |
1284 | default "0x1000000" if X86_NUMAQ | 1291 | default "0x1000000" if X86_NUMAQ |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..2b67609d0a1c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
24 | #include <asm/system.h> | 24 | #include <asm/system.h> |
25 | #include <asm/cacheflush.h> | ||
25 | 26 | ||
26 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 27 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
27 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | 28 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
@@ -85,10 +86,12 @@ static void load_segments(void) | |||
85 | * reboot code buffer to allow us to avoid allocations | 86 | * reboot code buffer to allow us to avoid allocations |
86 | * later. | 87 | * later. |
87 | * | 88 | * |
88 | * Currently nothing. | 89 | * Make control page executable. |
89 | */ | 90 | */ |
90 | int machine_kexec_prepare(struct kimage *image) | 91 | int machine_kexec_prepare(struct kimage *image) |
91 | { | 92 | { |
93 | if (nx_enabled) | ||
94 | set_pages_x(image->control_code_page, 1); | ||
92 | return 0; | 95 | return 0; |
93 | } | 96 | } |
94 | 97 | ||
@@ -98,16 +101,24 @@ int machine_kexec_prepare(struct kimage *image) | |||
98 | */ | 101 | */ |
99 | void machine_kexec_cleanup(struct kimage *image) | 102 | void machine_kexec_cleanup(struct kimage *image) |
100 | { | 103 | { |
104 | if (nx_enabled) | ||
105 | set_pages_nx(image->control_code_page, 1); | ||
101 | } | 106 | } |
102 | 107 | ||
103 | /* | 108 | /* |
104 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 109 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
105 | * We are past the point of no return, committed to rebooting now. | 110 | * We are past the point of no return, committed to rebooting now. |
106 | */ | 111 | */ |
107 | NORET_TYPE void machine_kexec(struct kimage *image) | 112 | void machine_kexec(struct kimage *image) |
108 | { | 113 | { |
109 | unsigned long page_list[PAGES_NR]; | 114 | unsigned long page_list[PAGES_NR]; |
110 | void *control_page; | 115 | void *control_page; |
116 | asmlinkage unsigned long | ||
117 | (*relocate_kernel_ptr)(unsigned long indirection_page, | ||
118 | unsigned long control_page, | ||
119 | unsigned long start_address, | ||
120 | unsigned int has_pae, | ||
121 | unsigned int preserve_context); | ||
111 | 122 | ||
112 | tracer_disable(); | 123 | tracer_disable(); |
113 | 124 | ||
@@ -115,10 +126,11 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
115 | local_irq_disable(); | 126 | local_irq_disable(); |
116 | 127 | ||
117 | control_page = page_address(image->control_code_page); | 128 | control_page = page_address(image->control_code_page); |
118 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 129 | memcpy(control_page, relocate_kernel, PAGE_SIZE/2); |
119 | 130 | ||
131 | relocate_kernel_ptr = control_page; | ||
120 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 132 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
121 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 133 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
122 | page_list[PA_PGD] = __pa(kexec_pgd); | 134 | page_list[PA_PGD] = __pa(kexec_pgd); |
123 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 135 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
124 | #ifdef CONFIG_X86_PAE | 136 | #ifdef CONFIG_X86_PAE |
@@ -131,6 +143,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
131 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 143 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
132 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 144 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
133 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 145 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
146 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); | ||
134 | 147 | ||
135 | /* The segment registers are funny things, they have both a | 148 | /* The segment registers are funny things, they have both a |
136 | * visible and an invisible part. Whenever the visible part is | 149 | * visible and an invisible part. Whenever the visible part is |
@@ -149,8 +162,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
149 | set_idt(phys_to_virt(0),0); | 162 | set_idt(phys_to_virt(0),0); |
150 | 163 | ||
151 | /* now call it */ | 164 | /* now call it */ |
152 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 165 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
153 | image->start, cpu_has_pae); | 166 | (unsigned long)page_list, |
167 | image->start, cpu_has_pae, | ||
168 | image->preserve_context); | ||
154 | } | 169 | } |
155 | 170 | ||
156 | void arch_crash_save_vmcoreinfo(void) | 171 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
181 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
182 | * We are past the point of no return, committed to rebooting now. | 182 | * We are past the point of no return, committed to rebooting now. |
183 | */ | 183 | */ |
184 | NORET_TYPE void machine_kexec(struct kimage *image) | 184 | void machine_kexec(struct kimage *image) |
185 | { | 185 | { |
186 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
187 | void *control_page; | 187 | void *control_page; |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..703310a99023 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -20,11 +20,44 @@ | |||
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
22 | 22 | ||
23 | /* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are | ||
24 | * used to save some data for jumping back | ||
25 | */ | ||
26 | #define DATA(offset) (PAGE_SIZE/2+(offset)) | ||
27 | |||
28 | /* Minimal CPU state */ | ||
29 | #define ESP DATA(0x0) | ||
30 | #define CR0 DATA(0x4) | ||
31 | #define CR3 DATA(0x8) | ||
32 | #define CR4 DATA(0xc) | ||
33 | |||
34 | /* other data */ | ||
35 | #define CP_VA_CONTROL_PAGE DATA(0x10) | ||
36 | #define CP_PA_PGD DATA(0x14) | ||
37 | #define CP_PA_SWAP_PAGE DATA(0x18) | ||
38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | ||
39 | |||
23 | .text | 40 | .text |
24 | .align PAGE_SIZE | 41 | .align PAGE_SIZE |
25 | .globl relocate_kernel | 42 | .globl relocate_kernel |
26 | relocate_kernel: | 43 | relocate_kernel: |
27 | movl 8(%esp), %ebp /* list of pages */ | 44 | /* Save the CPU context, used for jumping back */ |
45 | |||
46 | pushl %ebx | ||
47 | pushl %esi | ||
48 | pushl %edi | ||
49 | pushl %ebp | ||
50 | pushf | ||
51 | |||
52 | movl 20+8(%esp), %ebp /* list of pages */ | ||
53 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
54 | movl %esp, ESP(%edi) | ||
55 | movl %cr0, %eax | ||
56 | movl %eax, CR0(%edi) | ||
57 | movl %cr3, %eax | ||
58 | movl %eax, CR3(%edi) | ||
59 | movl %cr4, %eax | ||
60 | movl %eax, CR4(%edi) | ||
28 | 61 | ||
29 | #ifdef CONFIG_X86_PAE | 62 | #ifdef CONFIG_X86_PAE |
30 | /* map the control page at its virtual address */ | 63 | /* map the control page at its virtual address */ |
@@ -138,15 +171,25 @@ relocate_kernel: | |||
138 | 171 | ||
139 | relocate_new_kernel: | 172 | relocate_new_kernel: |
140 | /* read the arguments and say goodbye to the stack */ | 173 | /* read the arguments and say goodbye to the stack */ |
141 | movl 4(%esp), %ebx /* page_list */ | 174 | movl 20+4(%esp), %ebx /* page_list */ |
142 | movl 8(%esp), %ebp /* list of pages */ | 175 | movl 20+8(%esp), %ebp /* list of pages */ |
143 | movl 12(%esp), %edx /* start address */ | 176 | movl 20+12(%esp), %edx /* start address */ |
144 | movl 16(%esp), %ecx /* cpu_has_pae */ | 177 | movl 20+16(%esp), %ecx /* cpu_has_pae */ |
178 | movl 20+20(%esp), %esi /* preserve_context */ | ||
145 | 179 | ||
146 | /* zero out flags, and disable interrupts */ | 180 | /* zero out flags, and disable interrupts */ |
147 | pushl $0 | 181 | pushl $0 |
148 | popfl | 182 | popfl |
149 | 183 | ||
184 | /* save some information for jumping back */ | ||
185 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
186 | movl %edi, CP_VA_CONTROL_PAGE(%edi) | ||
187 | movl PTR(PA_PGD)(%ebp), %eax | ||
188 | movl %eax, CP_PA_PGD(%edi) | ||
189 | movl PTR(PA_SWAP_PAGE)(%ebp), %eax | ||
190 | movl %eax, CP_PA_SWAP_PAGE(%edi) | ||
191 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | ||
192 | |||
150 | /* get physical address of control page now */ | 193 | /* get physical address of control page now */ |
151 | /* this is impossible after page table switch */ | 194 | /* this is impossible after page table switch */ |
152 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 195 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
@@ -197,8 +240,90 @@ identity_mapped: | |||
197 | xorl %eax, %eax | 240 | xorl %eax, %eax |
198 | movl %eax, %cr3 | 241 | movl %eax, %cr3 |
199 | 242 | ||
243 | movl CP_PA_SWAP_PAGE(%edi), %eax | ||
244 | pushl %eax | ||
245 | pushl %ebx | ||
246 | call swap_pages | ||
247 | addl $8, %esp | ||
248 | |||
249 | /* To be certain of avoiding problems with self-modifying code | ||
250 | * I need to execute a serializing instruction here. | ||
251 | * So I flush the TLB, it's handy, and not processor dependent. | ||
252 | */ | ||
253 | xorl %eax, %eax | ||
254 | movl %eax, %cr3 | ||
255 | |||
256 | /* set all of the registers to known values */ | ||
257 | /* leave %esp alone */ | ||
258 | |||
259 | testl %esi, %esi | ||
260 | jnz 1f | ||
261 | xorl %edi, %edi | ||
262 | xorl %eax, %eax | ||
263 | xorl %ebx, %ebx | ||
264 | xorl %ecx, %ecx | ||
265 | xorl %edx, %edx | ||
266 | xorl %esi, %esi | ||
267 | xorl %ebp, %ebp | ||
268 | ret | ||
269 | 1: | ||
270 | popl %edx | ||
271 | movl CP_PA_SWAP_PAGE(%edi), %esp | ||
272 | addl $PAGE_SIZE, %esp | ||
273 | 2: | ||
274 | call *%edx | ||
275 | |||
276 | /* get the re-entry point of the peer system */ | ||
277 | movl 0(%esp), %ebp | ||
278 | call 1f | ||
279 | 1: | ||
280 | popl %ebx | ||
281 | subl $(1b - relocate_kernel), %ebx | ||
282 | movl CP_VA_CONTROL_PAGE(%ebx), %edi | ||
283 | lea PAGE_SIZE(%ebx), %esp | ||
284 | movl CP_PA_SWAP_PAGE(%ebx), %eax | ||
285 | movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx | ||
286 | pushl %eax | ||
287 | pushl %edx | ||
288 | call swap_pages | ||
289 | addl $8, %esp | ||
290 | movl CP_PA_PGD(%ebx), %eax | ||
291 | movl %eax, %cr3 | ||
292 | movl %cr0, %eax | ||
293 | orl $(1<<31), %eax | ||
294 | movl %eax, %cr0 | ||
295 | lea PAGE_SIZE(%edi), %esp | ||
296 | movl %edi, %eax | ||
297 | addl $(virtual_mapped - relocate_kernel), %eax | ||
298 | pushl %eax | ||
299 | ret | ||
300 | |||
301 | virtual_mapped: | ||
302 | movl CR4(%edi), %eax | ||
303 | movl %eax, %cr4 | ||
304 | movl CR3(%edi), %eax | ||
305 | movl %eax, %cr3 | ||
306 | movl CR0(%edi), %eax | ||
307 | movl %eax, %cr0 | ||
308 | movl ESP(%edi), %esp | ||
309 | movl %ebp, %eax | ||
310 | |||
311 | popf | ||
312 | popl %ebp | ||
313 | popl %edi | ||
314 | popl %esi | ||
315 | popl %ebx | ||
316 | ret | ||
317 | |||
200 | /* Do the copies */ | 318 | /* Do the copies */ |
201 | movl %ebx, %ecx | 319 | swap_pages: |
320 | movl 8(%esp), %edx | ||
321 | movl 4(%esp), %ecx | ||
322 | pushl %ebp | ||
323 | pushl %ebx | ||
324 | pushl %edi | ||
325 | pushl %esi | ||
326 | movl %ecx, %ebx | ||
202 | jmp 1f | 327 | jmp 1f |
203 | 328 | ||
204 | 0: /* top, read another word from the indirection page */ | 329 | 0: /* top, read another word from the indirection page */ |
@@ -226,27 +351,28 @@ identity_mapped: | |||
226 | movl %ecx, %esi /* For every source page do a copy */ | 351 | movl %ecx, %esi /* For every source page do a copy */ |
227 | andl $0xfffff000, %esi | 352 | andl $0xfffff000, %esi |
228 | 353 | ||
354 | movl %edi, %eax | ||
355 | movl %esi, %ebp | ||
356 | |||
357 | movl %edx, %edi | ||
229 | movl $1024, %ecx | 358 | movl $1024, %ecx |
230 | rep ; movsl | 359 | rep ; movsl |
231 | jmp 0b | ||
232 | 360 | ||
233 | 3: | 361 | movl %ebp, %edi |
234 | 362 | movl %eax, %esi | |
235 | /* To be certain of avoiding problems with self-modifying code | 363 | movl $1024, %ecx |
236 | * I need to execute a serializing instruction here. | 364 | rep ; movsl |
237 | * So I flush the TLB, it's handy, and not processor dependent. | ||
238 | */ | ||
239 | xorl %eax, %eax | ||
240 | movl %eax, %cr3 | ||
241 | 365 | ||
242 | /* set all of the registers to known values */ | 366 | movl %eax, %edi |
243 | /* leave %esp alone */ | 367 | movl %edx, %esi |
368 | movl $1024, %ecx | ||
369 | rep ; movsl | ||
244 | 370 | ||
245 | xorl %eax, %eax | 371 | lea PAGE_SIZE(%ebp), %esi |
246 | xorl %ebx, %ebx | 372 | jmp 0b |
247 | xorl %ecx, %ecx | 373 | 3: |
248 | xorl %edx, %edx | 374 | popl %esi |
249 | xorl %esi, %esi | 375 | popl %edi |
250 | xorl %edi, %edi | 376 | popl %ebx |
251 | xorl %ebp, %ebp | 377 | popl %ebp |
252 | ret | 378 | ret |