diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/kexec.h | 13 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/relocate_kernel_64.S | 177 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_64.lds.S | 7 |
5 files changed, 197 insertions, 44 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 31758378bcd2..87717f3687d2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1431,7 +1431,7 @@ config CRASH_DUMP | |||
1431 | config KEXEC_JUMP | 1431 | config KEXEC_JUMP |
1432 | bool "kexec jump (EXPERIMENTAL)" | 1432 | bool "kexec jump (EXPERIMENTAL)" |
1433 | depends on EXPERIMENTAL | 1433 | depends on EXPERIMENTAL |
1434 | depends on KEXEC && HIBERNATION && X86_32 | 1434 | depends on KEXEC && HIBERNATION |
1435 | ---help--- | 1435 | ---help--- |
1436 | Jump between original kernel and kexeced kernel and invoke | 1436 | Jump between original kernel and kexeced kernel and invoke |
1437 | code in physical address mode via KEXEC | 1437 | code in physical address mode via KEXEC |
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 0ceb6d19ed30..317ff1703d0b 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h | |||
@@ -9,13 +9,13 @@ | |||
9 | # define PAGES_NR 4 | 9 | # define PAGES_NR 4 |
10 | #else | 10 | #else |
11 | # define PA_CONTROL_PAGE 0 | 11 | # define PA_CONTROL_PAGE 0 |
12 | # define PA_TABLE_PAGE 1 | 12 | # define VA_CONTROL_PAGE 1 |
13 | # define PAGES_NR 2 | 13 | # define PA_TABLE_PAGE 2 |
14 | # define PA_SWAP_PAGE 3 | ||
15 | # define PAGES_NR 4 | ||
14 | #endif | 16 | #endif |
15 | 17 | ||
16 | #ifdef CONFIG_X86_32 | ||
17 | # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 | 18 | # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 |
18 | #endif | ||
19 | 19 | ||
20 | #ifndef __ASSEMBLY__ | 20 | #ifndef __ASSEMBLY__ |
21 | 21 | ||
@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page, | |||
136 | unsigned int has_pae, | 136 | unsigned int has_pae, |
137 | unsigned int preserve_context); | 137 | unsigned int preserve_context); |
138 | #else | 138 | #else |
139 | NORET_TYPE void | 139 | unsigned long |
140 | relocate_kernel(unsigned long indirection_page, | 140 | relocate_kernel(unsigned long indirection_page, |
141 | unsigned long page_list, | 141 | unsigned long page_list, |
142 | unsigned long start_address) ATTRIB_NORET; | 142 | unsigned long start_address, |
143 | unsigned int preserve_context); | ||
143 | #endif | 144 | #endif |
144 | 145 | ||
145 | #define ARCH_HAS_KIMAGE_ARCH | 146 | #define ARCH_HAS_KIMAGE_ARCH |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 7cc5d3d01483..89cea4d44679 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | #include <linux/io.h> | 15 | #include <linux/io.h> |
16 | #include <linux/suspend.h> | ||
16 | 17 | ||
17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
18 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image) | |||
270 | { | 271 | { |
271 | unsigned long page_list[PAGES_NR]; | 272 | unsigned long page_list[PAGES_NR]; |
272 | void *control_page; | 273 | void *control_page; |
274 | int save_ftrace_enabled; | ||
273 | 275 | ||
274 | tracer_disable(); | 276 | #ifdef CONFIG_KEXEC_JUMP |
277 | if (kexec_image->preserve_context) | ||
278 | save_processor_state(); | ||
279 | #endif | ||
280 | |||
281 | save_ftrace_enabled = __ftrace_enabled_save(); | ||
275 | 282 | ||
276 | /* Interrupts aren't acceptable while we reboot */ | 283 | /* Interrupts aren't acceptable while we reboot */ |
277 | local_irq_disable(); | 284 | local_irq_disable(); |
278 | 285 | ||
286 | if (image->preserve_context) { | ||
287 | #ifdef CONFIG_X86_IO_APIC | ||
288 | /* | ||
289 | * We need to put APICs in legacy mode so that we can | ||
290 | * get timer interrupts in second kernel. kexec/kdump | ||
291 | * paths already have calls to disable_IO_APIC() in | ||
292 | * one form or other. kexec jump path also need | ||
293 | * one. | ||
294 | */ | ||
295 | disable_IO_APIC(); | ||
296 | #endif | ||
297 | } | ||
298 | |||
279 | control_page = page_address(image->control_code_page) + PAGE_SIZE; | 299 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
280 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 300 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
281 | 301 | ||
282 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); | 302 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); |
303 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; | ||
283 | page_list[PA_TABLE_PAGE] = | 304 | page_list[PA_TABLE_PAGE] = |
284 | (unsigned long)__pa(page_address(image->control_code_page)); | 305 | (unsigned long)__pa(page_address(image->control_code_page)); |
285 | 306 | ||
307 | if (image->type == KEXEC_TYPE_DEFAULT) | ||
308 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | ||
309 | << PAGE_SHIFT); | ||
310 | |||
286 | /* | 311 | /* |
287 | * The segment registers are funny things, they have both a | 312 | * The segment registers are funny things, they have both a |
288 | * visible and an invisible part. Whenever the visible part is | 313 | * visible and an invisible part. Whenever the visible part is |
@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image) | |||
302 | set_idt(phys_to_virt(0), 0); | 327 | set_idt(phys_to_virt(0), 0); |
303 | 328 | ||
304 | /* now call it */ | 329 | /* now call it */ |
305 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 330 | image->start = relocate_kernel((unsigned long)image->head, |
306 | image->start); | 331 | (unsigned long)page_list, |
332 | image->start, | ||
333 | image->preserve_context); | ||
334 | |||
335 | #ifdef CONFIG_KEXEC_JUMP | ||
336 | if (kexec_image->preserve_context) | ||
337 | restore_processor_state(); | ||
338 | #endif | ||
339 | |||
340 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
307 | } | 341 | } |
308 | 342 | ||
309 | void arch_crash_save_vmcoreinfo(void) | 343 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index cfc0d24003dc..4de8f5b3d476 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S | |||
@@ -19,6 +19,24 @@ | |||
19 | #define PTR(x) (x << 3) | 19 | #define PTR(x) (x << 3) |
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
21 | 21 | ||
22 | /* | ||
23 | * control_page + KEXEC_CONTROL_CODE_MAX_SIZE | ||
24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | ||
25 | * jumping back | ||
26 | */ | ||
27 | #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) | ||
28 | |||
29 | /* Minimal CPU state */ | ||
30 | #define RSP DATA(0x0) | ||
31 | #define CR0 DATA(0x8) | ||
32 | #define CR3 DATA(0x10) | ||
33 | #define CR4 DATA(0x18) | ||
34 | |||
35 | /* other data */ | ||
36 | #define CP_PA_TABLE_PAGE DATA(0x20) | ||
37 | #define CP_PA_SWAP_PAGE DATA(0x28) | ||
38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x30) | ||
39 | |||
22 | .text | 40 | .text |
23 | .align PAGE_SIZE | 41 | .align PAGE_SIZE |
24 | .code64 | 42 | .code64 |
@@ -28,8 +46,27 @@ relocate_kernel: | |||
28 | * %rdi indirection_page | 46 | * %rdi indirection_page |
29 | * %rsi page_list | 47 | * %rsi page_list |
30 | * %rdx start address | 48 | * %rdx start address |
49 | * %rcx preserve_context | ||
31 | */ | 50 | */ |
32 | 51 | ||
52 | /* Save the CPU context, used for jumping back */ | ||
53 | pushq %rbx | ||
54 | pushq %rbp | ||
55 | pushq %r12 | ||
56 | pushq %r13 | ||
57 | pushq %r14 | ||
58 | pushq %r15 | ||
59 | pushf | ||
60 | |||
61 | movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 | ||
62 | movq %rsp, RSP(%r11) | ||
63 | movq %cr0, %rax | ||
64 | movq %rax, CR0(%r11) | ||
65 | movq %cr3, %rax | ||
66 | movq %rax, CR3(%r11) | ||
67 | movq %cr4, %rax | ||
68 | movq %rax, CR4(%r11) | ||
69 | |||
33 | /* zero out flags, and disable interrupts */ | 70 | /* zero out flags, and disable interrupts */ |
34 | pushq $0 | 71 | pushq $0 |
35 | popfq | 72 | popfq |
@@ -41,10 +78,18 @@ relocate_kernel: | |||
41 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | 78 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 |
42 | 79 | ||
43 | /* get physical address of page table now too */ | 80 | /* get physical address of page table now too */ |
44 | movq PTR(PA_TABLE_PAGE)(%rsi), %rcx | 81 | movq PTR(PA_TABLE_PAGE)(%rsi), %r9 |
82 | |||
83 | /* get physical address of swap page now */ | ||
84 | movq PTR(PA_SWAP_PAGE)(%rsi), %r10 | ||
85 | |||
86 | /* save some information for jumping back */ | ||
87 | movq %r9, CP_PA_TABLE_PAGE(%r11) | ||
88 | movq %r10, CP_PA_SWAP_PAGE(%r11) | ||
89 | movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) | ||
45 | 90 | ||
46 | /* Switch to the identity mapped page tables */ | 91 | /* Switch to the identity mapped page tables */ |
47 | movq %rcx, %cr3 | 92 | movq %r9, %cr3 |
48 | 93 | ||
49 | /* setup a new stack at the end of the physical control page */ | 94 | /* setup a new stack at the end of the physical control page */ |
50 | lea PAGE_SIZE(%r8), %rsp | 95 | lea PAGE_SIZE(%r8), %rsp |
@@ -83,9 +128,87 @@ identity_mapped: | |||
83 | 1: | 128 | 1: |
84 | 129 | ||
85 | /* Flush the TLB (needed?) */ | 130 | /* Flush the TLB (needed?) */ |
86 | movq %rcx, %cr3 | 131 | movq %r9, %cr3 |
132 | |||
133 | movq %rcx, %r11 | ||
134 | call swap_pages | ||
135 | |||
136 | /* | ||
137 | * To be certain of avoiding problems with self-modifying code | ||
138 | * I need to execute a serializing instruction here. | ||
139 | * So I flush the TLB by reloading %cr3 here, it's handy, | ||
140 | * and not processor dependent. | ||
141 | */ | ||
142 | movq %cr3, %rax | ||
143 | movq %rax, %cr3 | ||
144 | |||
145 | /* | ||
146 | * set all of the registers to known values | ||
147 | * leave %rsp alone | ||
148 | */ | ||
149 | |||
150 | testq %r11, %r11 | ||
151 | jnz 1f | ||
152 | xorq %rax, %rax | ||
153 | xorq %rbx, %rbx | ||
154 | xorq %rcx, %rcx | ||
155 | xorq %rdx, %rdx | ||
156 | xorq %rsi, %rsi | ||
157 | xorq %rdi, %rdi | ||
158 | xorq %rbp, %rbp | ||
159 | xorq %r8, %r8 | ||
160 | xorq %r9, %r9 | ||
161 | xorq %r10, %r9 | ||
162 | xorq %r11, %r11 | ||
163 | xorq %r12, %r12 | ||
164 | xorq %r13, %r13 | ||
165 | xorq %r14, %r14 | ||
166 | xorq %r15, %r15 | ||
167 | |||
168 | ret | ||
169 | |||
170 | 1: | ||
171 | popq %rdx | ||
172 | leaq PAGE_SIZE(%r10), %rsp | ||
173 | call *%rdx | ||
174 | |||
175 | /* get the re-entry point of the peer system */ | ||
176 | movq 0(%rsp), %rbp | ||
177 | call 1f | ||
178 | 1: | ||
179 | popq %r8 | ||
180 | subq $(1b - relocate_kernel), %r8 | ||
181 | movq CP_PA_SWAP_PAGE(%r8), %r10 | ||
182 | movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi | ||
183 | movq CP_PA_TABLE_PAGE(%r8), %rax | ||
184 | movq %rax, %cr3 | ||
185 | lea PAGE_SIZE(%r8), %rsp | ||
186 | call swap_pages | ||
187 | movq $virtual_mapped, %rax | ||
188 | pushq %rax | ||
189 | ret | ||
190 | |||
191 | virtual_mapped: | ||
192 | movq RSP(%r8), %rsp | ||
193 | movq CR4(%r8), %rax | ||
194 | movq %rax, %cr4 | ||
195 | movq CR3(%r8), %rax | ||
196 | movq CR0(%r8), %r8 | ||
197 | movq %rax, %cr3 | ||
198 | movq %r8, %cr0 | ||
199 | movq %rbp, %rax | ||
200 | |||
201 | popf | ||
202 | popq %r15 | ||
203 | popq %r14 | ||
204 | popq %r13 | ||
205 | popq %r12 | ||
206 | popq %rbp | ||
207 | popq %rbx | ||
208 | ret | ||
87 | 209 | ||
88 | /* Do the copies */ | 210 | /* Do the copies */ |
211 | swap_pages: | ||
89 | movq %rdi, %rcx /* Put the page_list in %rcx */ | 212 | movq %rdi, %rcx /* Put the page_list in %rcx */ |
90 | xorq %rdi, %rdi | 213 | xorq %rdi, %rdi |
91 | xorq %rsi, %rsi | 214 | xorq %rsi, %rsi |
@@ -117,39 +240,27 @@ identity_mapped: | |||
117 | movq %rcx, %rsi /* For ever source page do a copy */ | 240 | movq %rcx, %rsi /* For ever source page do a copy */ |
118 | andq $0xfffffffffffff000, %rsi | 241 | andq $0xfffffffffffff000, %rsi |
119 | 242 | ||
243 | movq %rdi, %rdx | ||
244 | movq %rsi, %rax | ||
245 | |||
246 | movq %r10, %rdi | ||
120 | movq $512, %rcx | 247 | movq $512, %rcx |
121 | rep ; movsq | 248 | rep ; movsq |
122 | jmp 0b | ||
123 | 3: | ||
124 | 249 | ||
125 | /* | 250 | movq %rax, %rdi |
126 | * To be certain of avoiding problems with self-modifying code | 251 | movq %rdx, %rsi |
127 | * I need to execute a serializing instruction here. | 252 | movq $512, %rcx |
128 | * So I flush the TLB by reloading %cr3 here, it's handy, | 253 | rep ; movsq |
129 | * and not processor dependent. | ||
130 | */ | ||
131 | movq %cr3, %rax | ||
132 | movq %rax, %cr3 | ||
133 | |||
134 | /* | ||
135 | * set all of the registers to known values | ||
136 | * leave %rsp alone | ||
137 | */ | ||
138 | 254 | ||
139 | xorq %rax, %rax | 255 | movq %rdx, %rdi |
140 | xorq %rbx, %rbx | 256 | movq %r10, %rsi |
141 | xorq %rcx, %rcx | 257 | movq $512, %rcx |
142 | xorq %rdx, %rdx | 258 | rep ; movsq |
143 | xorq %rsi, %rsi | ||
144 | xorq %rdi, %rdi | ||
145 | xorq %rbp, %rbp | ||
146 | xorq %r8, %r8 | ||
147 | xorq %r9, %r9 | ||
148 | xorq %r10, %r9 | ||
149 | xorq %r11, %r11 | ||
150 | xorq %r12, %r12 | ||
151 | xorq %r13, %r13 | ||
152 | xorq %r14, %r14 | ||
153 | xorq %r15, %r15 | ||
154 | 259 | ||
260 | lea PAGE_SIZE(%rax), %rsi | ||
261 | jmp 0b | ||
262 | 3: | ||
155 | ret | 263 | ret |
264 | |||
265 | .globl kexec_control_code_size | ||
266 | .set kexec_control_code_size, . - relocate_kernel | ||
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fbfced6f6800..5bf54e40c6ef 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | |||
275 | ASSERT((per_cpu__irq_stack_union == 0), | 275 | ASSERT((per_cpu__irq_stack_union == 0), |
276 | "irq_stack_union is not at start of per-cpu area"); | 276 | "irq_stack_union is not at start of per-cpu area"); |
277 | #endif | 277 | #endif |
278 | |||
279 | #ifdef CONFIG_KEXEC | ||
280 | #include <asm/kexec.h> | ||
281 | |||
282 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
283 | "kexec control code size is too big") | ||
284 | #endif | ||