aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2009-03-09 22:57:16 -0400
committerH. Peter Anvin <hpa@zytor.com>2009-03-10 21:13:25 -0400
commitfee7b0d84cc8c7bc5dc212901c79e93eaf83a5b5 (patch)
treef855b0b5057c3dff7e26c840218cb22bfe965a7a
parent5359454701ce51a4626b1ef6eb7b16ec35bd458d (diff)
x86, kexec: x86_64: add kexec jump support for x86_64
Impact: New major feature This patch add kexec jump support for x86_64. More information about kexec jump can be found in corresponding x86_32 support patch. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/kexec.h13
-rw-r--r--arch/x86/kernel/machine_kexec_64.c42
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S177
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S7
5 files changed, 197 insertions, 44 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 31758378bcd2..87717f3687d2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1431,7 +1431,7 @@ config CRASH_DUMP
1431config KEXEC_JUMP 1431config KEXEC_JUMP
1432 bool "kexec jump (EXPERIMENTAL)" 1432 bool "kexec jump (EXPERIMENTAL)"
1433 depends on EXPERIMENTAL 1433 depends on EXPERIMENTAL
1434 depends on KEXEC && HIBERNATION && X86_32 1434 depends on KEXEC && HIBERNATION
1435 ---help--- 1435 ---help---
1436 Jump between original kernel and kexeced kernel and invoke 1436 Jump between original kernel and kexeced kernel and invoke
1437 code in physical address mode via KEXEC 1437 code in physical address mode via KEXEC
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0ceb6d19ed30..317ff1703d0b 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,13 +9,13 @@
9# define PAGES_NR 4 9# define PAGES_NR 4
10#else 10#else
11# define PA_CONTROL_PAGE 0 11# define PA_CONTROL_PAGE 0
12# define PA_TABLE_PAGE 1 12# define VA_CONTROL_PAGE 1
13# define PAGES_NR 2 13# define PA_TABLE_PAGE 2
14# define PA_SWAP_PAGE 3
15# define PAGES_NR 4
14#endif 16#endif
15 17
16#ifdef CONFIG_X86_32
17# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 18# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
18#endif
19 19
20#ifndef __ASSEMBLY__ 20#ifndef __ASSEMBLY__
21 21
@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
136 unsigned int has_pae, 136 unsigned int has_pae,
137 unsigned int preserve_context); 137 unsigned int preserve_context);
138#else 138#else
139NORET_TYPE void 139unsigned long
140relocate_kernel(unsigned long indirection_page, 140relocate_kernel(unsigned long indirection_page,
141 unsigned long page_list, 141 unsigned long page_list,
142 unsigned long start_address) ATTRIB_NORET; 142 unsigned long start_address,
143 unsigned int preserve_context);
143#endif 144#endif
144 145
145#define ARCH_HAS_KIMAGE_ARCH 146#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7cc5d3d01483..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -13,6 +13,7 @@
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/suspend.h>
16 17
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
270{ 271{
271 unsigned long page_list[PAGES_NR]; 272 unsigned long page_list[PAGES_NR];
272 void *control_page; 273 void *control_page;
274 int save_ftrace_enabled;
273 275
274 tracer_disable(); 276#ifdef CONFIG_KEXEC_JUMP
277 if (kexec_image->preserve_context)
278 save_processor_state();
279#endif
280
281 save_ftrace_enabled = __ftrace_enabled_save();
275 282
276 /* Interrupts aren't acceptable while we reboot */ 283 /* Interrupts aren't acceptable while we reboot */
277 local_irq_disable(); 284 local_irq_disable();
278 285
286 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC
288 /*
289 * We need to put APICs in legacy mode so that we can
290 * get timer interrupts in second kernel. kexec/kdump
291 * paths already have calls to disable_IO_APIC() in
292 * one form or other. kexec jump path also need
293 * one.
294 */
295 disable_IO_APIC();
296#endif
297 }
298
279 control_page = page_address(image->control_code_page) + PAGE_SIZE; 299 control_page = page_address(image->control_code_page) + PAGE_SIZE;
280 memcpy(control_page, relocate_kernel, PAGE_SIZE); 300 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
281 301
282 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 302 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
303 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
283 page_list[PA_TABLE_PAGE] = 304 page_list[PA_TABLE_PAGE] =
284 (unsigned long)__pa(page_address(image->control_code_page)); 305 (unsigned long)__pa(page_address(image->control_code_page));
285 306
307 if (image->type == KEXEC_TYPE_DEFAULT)
308 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
309 << PAGE_SHIFT);
310
286 /* 311 /*
287 * The segment registers are funny things, they have both a 312 * The segment registers are funny things, they have both a
288 * visible and an invisible part. Whenever the visible part is 313 * visible and an invisible part. Whenever the visible part is
@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
302 set_idt(phys_to_virt(0), 0); 327 set_idt(phys_to_virt(0), 0);
303 328
304 /* now call it */ 329 /* now call it */
305 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 330 image->start = relocate_kernel((unsigned long)image->head,
306 image->start); 331 (unsigned long)page_list,
332 image->start,
333 image->preserve_context);
334
335#ifdef CONFIG_KEXEC_JUMP
336 if (kexec_image->preserve_context)
337 restore_processor_state();
338#endif
339
340 __ftrace_enabled_restore(save_ftrace_enabled);
307} 341}
308 342
309void arch_crash_save_vmcoreinfo(void) 343void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index cfc0d24003dc..4de8f5b3d476 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,6 +19,24 @@
19#define PTR(x) (x << 3) 19#define PTR(x) (x << 3)
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21 21
22/*
23 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for
25 * jumping back
26 */
27#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
28
29/* Minimal CPU state */
30#define RSP DATA(0x0)
31#define CR0 DATA(0x8)
32#define CR3 DATA(0x10)
33#define CR4 DATA(0x18)
34
35/* other data */
36#define CP_PA_TABLE_PAGE DATA(0x20)
37#define CP_PA_SWAP_PAGE DATA(0x28)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
39
22 .text 40 .text
23 .align PAGE_SIZE 41 .align PAGE_SIZE
24 .code64 42 .code64
@@ -28,8 +46,27 @@ relocate_kernel:
28 * %rdi indirection_page 46 * %rdi indirection_page
29 * %rsi page_list 47 * %rsi page_list
30 * %rdx start address 48 * %rdx start address
49 * %rcx preserve_context
31 */ 50 */
32 51
52 /* Save the CPU context, used for jumping back */
53 pushq %rbx
54 pushq %rbp
55 pushq %r12
56 pushq %r13
57 pushq %r14
58 pushq %r15
59 pushf
60
61 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
62 movq %rsp, RSP(%r11)
63 movq %cr0, %rax
64 movq %rax, CR0(%r11)
65 movq %cr3, %rax
66 movq %rax, CR3(%r11)
67 movq %cr4, %rax
68 movq %rax, CR4(%r11)
69
33 /* zero out flags, and disable interrupts */ 70 /* zero out flags, and disable interrupts */
34 pushq $0 71 pushq $0
35 popfq 72 popfq
@@ -41,10 +78,18 @@ relocate_kernel:
41 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 78 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
42 79
43 /* get physical address of page table now too */ 80 /* get physical address of page table now too */
44 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 81 movq PTR(PA_TABLE_PAGE)(%rsi), %r9
82
83 /* get physical address of swap page now */
84 movq PTR(PA_SWAP_PAGE)(%rsi), %r10
85
86 /* save some information for jumping back */
87 movq %r9, CP_PA_TABLE_PAGE(%r11)
88 movq %r10, CP_PA_SWAP_PAGE(%r11)
89 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
45 90
46 /* Switch to the identity mapped page tables */ 91 /* Switch to the identity mapped page tables */
47 movq %rcx, %cr3 92 movq %r9, %cr3
48 93
49 /* setup a new stack at the end of the physical control page */ 94 /* setup a new stack at the end of the physical control page */
50 lea PAGE_SIZE(%r8), %rsp 95 lea PAGE_SIZE(%r8), %rsp
@@ -83,9 +128,87 @@ identity_mapped:
831: 1281:
84 129
85 /* Flush the TLB (needed?) */ 130 /* Flush the TLB (needed?) */
86 movq %rcx, %cr3 131 movq %r9, %cr3
132
133 movq %rcx, %r11
134 call swap_pages
135
136 /*
137 * To be certain of avoiding problems with self-modifying code
138 * I need to execute a serializing instruction here.
139 * So I flush the TLB by reloading %cr3 here, it's handy,
140 * and not processor dependent.
141 */
142 movq %cr3, %rax
143 movq %rax, %cr3
144
145 /*
146 * set all of the registers to known values
147 * leave %rsp alone
148 */
149
150 testq %r11, %r11
151 jnz 1f
152 xorq %rax, %rax
153 xorq %rbx, %rbx
154 xorq %rcx, %rcx
155 xorq %rdx, %rdx
156 xorq %rsi, %rsi
157 xorq %rdi, %rdi
158 xorq %rbp, %rbp
159 xorq %r8, %r8
160 xorq %r9, %r9
161 xorq %r10, %r9
162 xorq %r11, %r11
163 xorq %r12, %r12
164 xorq %r13, %r13
165 xorq %r14, %r14
166 xorq %r15, %r15
167
168 ret
169
1701:
171 popq %rdx
172 leaq PAGE_SIZE(%r10), %rsp
173 call *%rdx
174
175 /* get the re-entry point of the peer system */
176 movq 0(%rsp), %rbp
177 call 1f
1781:
179 popq %r8
180 subq $(1b - relocate_kernel), %r8
181 movq CP_PA_SWAP_PAGE(%r8), %r10
182 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
183 movq CP_PA_TABLE_PAGE(%r8), %rax
184 movq %rax, %cr3
185 lea PAGE_SIZE(%r8), %rsp
186 call swap_pages
187 movq $virtual_mapped, %rax
188 pushq %rax
189 ret
190
191virtual_mapped:
192 movq RSP(%r8), %rsp
193 movq CR4(%r8), %rax
194 movq %rax, %cr4
195 movq CR3(%r8), %rax
196 movq CR0(%r8), %r8
197 movq %rax, %cr3
198 movq %r8, %cr0
199 movq %rbp, %rax
200
201 popf
202 popq %r15
203 popq %r14
204 popq %r13
205 popq %r12
206 popq %rbp
207 popq %rbx
208 ret
87 209
88 /* Do the copies */ 210 /* Do the copies */
211swap_pages:
89 movq %rdi, %rcx /* Put the page_list in %rcx */ 212 movq %rdi, %rcx /* Put the page_list in %rcx */
90 xorq %rdi, %rdi 213 xorq %rdi, %rdi
91 xorq %rsi, %rsi 214 xorq %rsi, %rsi
@@ -117,39 +240,27 @@ identity_mapped:
117 movq %rcx, %rsi /* For ever source page do a copy */ 240 movq %rcx, %rsi /* For ever source page do a copy */
118 andq $0xfffffffffffff000, %rsi 241 andq $0xfffffffffffff000, %rsi
119 242
243 movq %rdi, %rdx
244 movq %rsi, %rax
245
246 movq %r10, %rdi
120 movq $512, %rcx 247 movq $512, %rcx
121 rep ; movsq 248 rep ; movsq
122 jmp 0b
1233:
124 249
125 /* 250 movq %rax, %rdi
126 * To be certain of avoiding problems with self-modifying code 251 movq %rdx, %rsi
127 * I need to execute a serializing instruction here. 252 movq $512, %rcx
128 * So I flush the TLB by reloading %cr3 here, it's handy, 253 rep ; movsq
129 * and not processor dependent.
130 */
131 movq %cr3, %rax
132 movq %rax, %cr3
133
134 /*
135 * set all of the registers to known values
136 * leave %rsp alone
137 */
138 254
139 xorq %rax, %rax 255 movq %rdx, %rdi
140 xorq %rbx, %rbx 256 movq %r10, %rsi
141 xorq %rcx, %rcx 257 movq $512, %rcx
142 xorq %rdx, %rdx 258 rep ; movsq
143 xorq %rsi, %rsi
144 xorq %rdi, %rdi
145 xorq %rbp, %rbp
146 xorq %r8, %r8
147 xorq %r9, %r9
148 xorq %r10, %r9
149 xorq %r11, %r11
150 xorq %r12, %r12
151 xorq %r13, %r13
152 xorq %r14, %r14
153 xorq %r15, %r15
154 259
260 lea PAGE_SIZE(%rax), %rsi
261 jmp 0b
2623:
155 ret 263 ret
264
265 .globl kexec_control_code_size
266.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f6800..5bf54e40c6ef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
275ASSERT((per_cpu__irq_stack_union == 0), 275ASSERT((per_cpu__irq_stack_union == 0),
276 "irq_stack_union is not at start of per-cpu area"); 276 "irq_stack_union is not at start of per-cpu area");
277#endif 277#endif
278
279#ifdef CONFIG_KEXEC
280#include <asm/kexec.h>
281
282ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
283 "kexec control code size is too big")
284#endif