aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2009-03-09 22:57:16 -0400
committerH. Peter Anvin <hpa@zytor.com>2009-03-10 21:13:25 -0400
commitfee7b0d84cc8c7bc5dc212901c79e93eaf83a5b5 (patch)
treef855b0b5057c3dff7e26c840218cb22bfe965a7a /arch/x86/kernel
parent5359454701ce51a4626b1ef6eb7b16ec35bd458d (diff)
x86, kexec: x86_64: add kexec jump support for x86_64
Impact: New major feature This patch add kexec jump support for x86_64. More information about kexec jump can be found in corresponding x86_32 support patch. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/machine_kexec_64.c42
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S177
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S7
3 files changed, 189 insertions, 37 deletions
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7cc5d3d01483..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -13,6 +13,7 @@
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/suspend.h>
16 17
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
270{ 271{
271 unsigned long page_list[PAGES_NR]; 272 unsigned long page_list[PAGES_NR];
272 void *control_page; 273 void *control_page;
274 int save_ftrace_enabled;
273 275
274 tracer_disable(); 276#ifdef CONFIG_KEXEC_JUMP
277 if (kexec_image->preserve_context)
278 save_processor_state();
279#endif
280
281 save_ftrace_enabled = __ftrace_enabled_save();
275 282
276 /* Interrupts aren't acceptable while we reboot */ 283 /* Interrupts aren't acceptable while we reboot */
277 local_irq_disable(); 284 local_irq_disable();
278 285
286 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC
288 /*
289 * We need to put APICs in legacy mode so that we can
290 * get timer interrupts in second kernel. kexec/kdump
291 * paths already have calls to disable_IO_APIC() in
292 * one form or other. kexec jump path also need
293 * one.
294 */
295 disable_IO_APIC();
296#endif
297 }
298
279 control_page = page_address(image->control_code_page) + PAGE_SIZE; 299 control_page = page_address(image->control_code_page) + PAGE_SIZE;
280 memcpy(control_page, relocate_kernel, PAGE_SIZE); 300 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
281 301
282 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 302 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
303 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
283 page_list[PA_TABLE_PAGE] = 304 page_list[PA_TABLE_PAGE] =
284 (unsigned long)__pa(page_address(image->control_code_page)); 305 (unsigned long)__pa(page_address(image->control_code_page));
285 306
307 if (image->type == KEXEC_TYPE_DEFAULT)
308 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
309 << PAGE_SHIFT);
310
286 /* 311 /*
287 * The segment registers are funny things, they have both a 312 * The segment registers are funny things, they have both a
288 * visible and an invisible part. Whenever the visible part is 313 * visible and an invisible part. Whenever the visible part is
@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
302 set_idt(phys_to_virt(0), 0); 327 set_idt(phys_to_virt(0), 0);
303 328
304 /* now call it */ 329 /* now call it */
305 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 330 image->start = relocate_kernel((unsigned long)image->head,
306 image->start); 331 (unsigned long)page_list,
332 image->start,
333 image->preserve_context);
334
335#ifdef CONFIG_KEXEC_JUMP
336 if (kexec_image->preserve_context)
337 restore_processor_state();
338#endif
339
340 __ftrace_enabled_restore(save_ftrace_enabled);
307} 341}
308 342
309void arch_crash_save_vmcoreinfo(void) 343void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index cfc0d24003dc..4de8f5b3d476 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,6 +19,24 @@
19#define PTR(x) (x << 3) 19#define PTR(x) (x << 3)
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21 21
22/*
23 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for
25 * jumping back
26 */
27#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
28
29/* Minimal CPU state */
30#define RSP DATA(0x0)
31#define CR0 DATA(0x8)
32#define CR3 DATA(0x10)
33#define CR4 DATA(0x18)
34
35/* other data */
36#define CP_PA_TABLE_PAGE DATA(0x20)
37#define CP_PA_SWAP_PAGE DATA(0x28)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
39
22 .text 40 .text
23 .align PAGE_SIZE 41 .align PAGE_SIZE
24 .code64 42 .code64
@@ -28,8 +46,27 @@ relocate_kernel:
28 * %rdi indirection_page 46 * %rdi indirection_page
29 * %rsi page_list 47 * %rsi page_list
30 * %rdx start address 48 * %rdx start address
49 * %rcx preserve_context
31 */ 50 */
32 51
52 /* Save the CPU context, used for jumping back */
53 pushq %rbx
54 pushq %rbp
55 pushq %r12
56 pushq %r13
57 pushq %r14
58 pushq %r15
59 pushf
60
61 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
62 movq %rsp, RSP(%r11)
63 movq %cr0, %rax
64 movq %rax, CR0(%r11)
65 movq %cr3, %rax
66 movq %rax, CR3(%r11)
67 movq %cr4, %rax
68 movq %rax, CR4(%r11)
69
33 /* zero out flags, and disable interrupts */ 70 /* zero out flags, and disable interrupts */
34 pushq $0 71 pushq $0
35 popfq 72 popfq
@@ -41,10 +78,18 @@ relocate_kernel:
41 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 78 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
42 79
43 /* get physical address of page table now too */ 80 /* get physical address of page table now too */
44 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 81 movq PTR(PA_TABLE_PAGE)(%rsi), %r9
82
83 /* get physical address of swap page now */
84 movq PTR(PA_SWAP_PAGE)(%rsi), %r10
85
86 /* save some information for jumping back */
87 movq %r9, CP_PA_TABLE_PAGE(%r11)
88 movq %r10, CP_PA_SWAP_PAGE(%r11)
89 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
45 90
46 /* Switch to the identity mapped page tables */ 91 /* Switch to the identity mapped page tables */
47 movq %rcx, %cr3 92 movq %r9, %cr3
48 93
49 /* setup a new stack at the end of the physical control page */ 94 /* setup a new stack at the end of the physical control page */
50 lea PAGE_SIZE(%r8), %rsp 95 lea PAGE_SIZE(%r8), %rsp
@@ -83,9 +128,87 @@ identity_mapped:
831: 1281:
84 129
85 /* Flush the TLB (needed?) */ 130 /* Flush the TLB (needed?) */
86 movq %rcx, %cr3 131 movq %r9, %cr3
132
133 movq %rcx, %r11
134 call swap_pages
135
136 /*
137 * To be certain of avoiding problems with self-modifying code
138 * I need to execute a serializing instruction here.
139 * So I flush the TLB by reloading %cr3 here, it's handy,
140 * and not processor dependent.
141 */
142 movq %cr3, %rax
143 movq %rax, %cr3
144
145 /*
146 * set all of the registers to known values
147 * leave %rsp alone
148 */
149
150 testq %r11, %r11
151 jnz 1f
152 xorq %rax, %rax
153 xorq %rbx, %rbx
154 xorq %rcx, %rcx
155 xorq %rdx, %rdx
156 xorq %rsi, %rsi
157 xorq %rdi, %rdi
158 xorq %rbp, %rbp
159 xorq %r8, %r8
160 xorq %r9, %r9
161 xorq %r10, %r9
162 xorq %r11, %r11
163 xorq %r12, %r12
164 xorq %r13, %r13
165 xorq %r14, %r14
166 xorq %r15, %r15
167
168 ret
169
1701:
171 popq %rdx
172 leaq PAGE_SIZE(%r10), %rsp
173 call *%rdx
174
175 /* get the re-entry point of the peer system */
176 movq 0(%rsp), %rbp
177 call 1f
1781:
179 popq %r8
180 subq $(1b - relocate_kernel), %r8
181 movq CP_PA_SWAP_PAGE(%r8), %r10
182 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
183 movq CP_PA_TABLE_PAGE(%r8), %rax
184 movq %rax, %cr3
185 lea PAGE_SIZE(%r8), %rsp
186 call swap_pages
187 movq $virtual_mapped, %rax
188 pushq %rax
189 ret
190
191virtual_mapped:
192 movq RSP(%r8), %rsp
193 movq CR4(%r8), %rax
194 movq %rax, %cr4
195 movq CR3(%r8), %rax
196 movq CR0(%r8), %r8
197 movq %rax, %cr3
198 movq %r8, %cr0
199 movq %rbp, %rax
200
201 popf
202 popq %r15
203 popq %r14
204 popq %r13
205 popq %r12
206 popq %rbp
207 popq %rbx
208 ret
87 209
88 /* Do the copies */ 210 /* Do the copies */
211swap_pages:
89 movq %rdi, %rcx /* Put the page_list in %rcx */ 212 movq %rdi, %rcx /* Put the page_list in %rcx */
90 xorq %rdi, %rdi 213 xorq %rdi, %rdi
91 xorq %rsi, %rsi 214 xorq %rsi, %rsi
@@ -117,39 +240,27 @@ identity_mapped:
117 movq %rcx, %rsi /* For ever source page do a copy */ 240 movq %rcx, %rsi /* For ever source page do a copy */
118 andq $0xfffffffffffff000, %rsi 241 andq $0xfffffffffffff000, %rsi
119 242
243 movq %rdi, %rdx
244 movq %rsi, %rax
245
246 movq %r10, %rdi
120 movq $512, %rcx 247 movq $512, %rcx
121 rep ; movsq 248 rep ; movsq
122 jmp 0b
1233:
124 249
125 /* 250 movq %rax, %rdi
126 * To be certain of avoiding problems with self-modifying code 251 movq %rdx, %rsi
127 * I need to execute a serializing instruction here. 252 movq $512, %rcx
128 * So I flush the TLB by reloading %cr3 here, it's handy, 253 rep ; movsq
129 * and not processor dependent.
130 */
131 movq %cr3, %rax
132 movq %rax, %cr3
133
134 /*
135 * set all of the registers to known values
136 * leave %rsp alone
137 */
138 254
139 xorq %rax, %rax 255 movq %rdx, %rdi
140 xorq %rbx, %rbx 256 movq %r10, %rsi
141 xorq %rcx, %rcx 257 movq $512, %rcx
142 xorq %rdx, %rdx 258 rep ; movsq
143 xorq %rsi, %rsi
144 xorq %rdi, %rdi
145 xorq %rbp, %rbp
146 xorq %r8, %r8
147 xorq %r9, %r9
148 xorq %r10, %r9
149 xorq %r11, %r11
150 xorq %r12, %r12
151 xorq %r13, %r13
152 xorq %r14, %r14
153 xorq %r15, %r15
154 259
260 lea PAGE_SIZE(%rax), %rsi
261 jmp 0b
2623:
155 ret 263 ret
264
265 .globl kexec_control_code_size
266.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f6800..5bf54e40c6ef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
275ASSERT((per_cpu__irq_stack_union == 0), 275ASSERT((per_cpu__irq_stack_union == 0),
276 "irq_stack_union is not at start of per-cpu area"); 276 "irq_stack_union is not at start of per-cpu area");
277#endif 277#endif
278
279#ifdef CONFIG_KEXEC
280#include <asm/kexec.h>
281
282ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
283 "kexec control code size is too big")
284#endif