aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/machine_kexec_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/machine_kexec_64.c')
-rw-r--r--arch/x86/kernel/machine_kexec_64.c179
1 files changed, 142 insertions, 37 deletions
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c43caa3a91f3..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,20 +12,47 @@
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/io.h>
16#include <linux/suspend.h>
15 17
16#include <asm/pgtable.h> 18#include <asm/pgtable.h>
17#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
18#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
19#include <asm/io.h>
20 21
21#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 22static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
22static u64 kexec_pgd[512] PAGE_ALIGNED; 23 unsigned long addr)
23static u64 kexec_pud0[512] PAGE_ALIGNED; 24{
24static u64 kexec_pmd0[512] PAGE_ALIGNED; 25 pud_t *pud;
25static u64 kexec_pte0[512] PAGE_ALIGNED; 26 pmd_t *pmd;
26static u64 kexec_pud1[512] PAGE_ALIGNED; 27 struct page *page;
27static u64 kexec_pmd1[512] PAGE_ALIGNED; 28 int result = -ENOMEM;
28static u64 kexec_pte1[512] PAGE_ALIGNED; 29
30 addr &= PMD_MASK;
31 pgd += pgd_index(addr);
32 if (!pgd_present(*pgd)) {
33 page = kimage_alloc_control_pages(image, 0);
34 if (!page)
35 goto out;
36 pud = (pud_t *)page_address(page);
37 memset(pud, 0, PAGE_SIZE);
38 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
39 }
40 pud = pud_offset(pgd, addr);
41 if (!pud_present(*pud)) {
42 page = kimage_alloc_control_pages(image, 0);
43 if (!page)
44 goto out;
45 pmd = (pmd_t *)page_address(page);
46 memset(pmd, 0, PAGE_SIZE);
47 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
48 }
49 pmd = pmd_offset(pud, addr);
50 if (!pmd_present(*pmd))
51 set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
52 result = 0;
53out:
54 return result;
55}
29 56
30static void init_level2_page(pmd_t *level2p, unsigned long addr) 57static void init_level2_page(pmd_t *level2p, unsigned long addr)
31{ 58{
@@ -92,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
92 } 119 }
93 level3p = (pud_t *)page_address(page); 120 level3p = (pud_t *)page_address(page);
94 result = init_level3_page(image, level3p, addr, last_addr); 121 result = init_level3_page(image, level3p, addr, last_addr);
95 if (result) { 122 if (result)
96 goto out; 123 goto out;
97 }
98 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); 124 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
99 addr += PGDIR_SIZE; 125 addr += PGDIR_SIZE;
100 } 126 }
@@ -107,12 +133,72 @@ out:
107 return result; 133 return result;
108} 134}
109 135
136static void free_transition_pgtable(struct kimage *image)
137{
138 free_page((unsigned long)image->arch.pud);
139 free_page((unsigned long)image->arch.pmd);
140 free_page((unsigned long)image->arch.pte);
141}
142
143static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
144{
145 pud_t *pud;
146 pmd_t *pmd;
147 pte_t *pte;
148 unsigned long vaddr, paddr;
149 int result = -ENOMEM;
150
151 vaddr = (unsigned long)relocate_kernel;
152 paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
153 pgd += pgd_index(vaddr);
154 if (!pgd_present(*pgd)) {
155 pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
156 if (!pud)
157 goto err;
158 image->arch.pud = pud;
159 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
160 }
161 pud = pud_offset(pgd, vaddr);
162 if (!pud_present(*pud)) {
163 pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
164 if (!pmd)
165 goto err;
166 image->arch.pmd = pmd;
167 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
168 }
169 pmd = pmd_offset(pud, vaddr);
170 if (!pmd_present(*pmd)) {
171 pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
172 if (!pte)
173 goto err;
174 image->arch.pte = pte;
175 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
176 }
177 pte = pte_offset_kernel(pmd, vaddr);
178 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
179 return 0;
180err:
181 free_transition_pgtable(image);
182 return result;
183}
184
110 185
111static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 186static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
112{ 187{
113 pgd_t *level4p; 188 pgd_t *level4p;
189 int result;
114 level4p = (pgd_t *)__va(start_pgtable); 190 level4p = (pgd_t *)__va(start_pgtable);
115 return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 191 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
192 if (result)
193 return result;
194 /*
195 * image->start may be outside 0 ~ max_pfn, for example when
196 * jump back to original kernel from kexeced kernel
197 */
198 result = init_one_level2_page(image, level4p, image->start);
199 if (result)
200 return result;
201 return init_transition_pgtable(image, level4p);
116} 202}
117 203
118static void set_idt(void *newidt, u16 limit) 204static void set_idt(void *newidt, u16 limit)
@@ -174,7 +260,7 @@ int machine_kexec_prepare(struct kimage *image)
174 260
175void machine_kexec_cleanup(struct kimage *image) 261void machine_kexec_cleanup(struct kimage *image)
176{ 262{
177 return; 263 free_transition_pgtable(image);
178} 264}
179 265
180/* 266/*
@@ -185,36 +271,45 @@ void machine_kexec(struct kimage *image)
185{ 271{
186 unsigned long page_list[PAGES_NR]; 272 unsigned long page_list[PAGES_NR];
187 void *control_page; 273 void *control_page;
274 int save_ftrace_enabled;
188 275
189 tracer_disable(); 276#ifdef CONFIG_KEXEC_JUMP
277 if (kexec_image->preserve_context)
278 save_processor_state();
279#endif
280
281 save_ftrace_enabled = __ftrace_enabled_save();
190 282
191 /* Interrupts aren't acceptable while we reboot */ 283 /* Interrupts aren't acceptable while we reboot */
192 local_irq_disable(); 284 local_irq_disable();
193 285
286 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC
288 /*
289 * We need to put APICs in legacy mode so that we can
290 * get timer interrupts in second kernel. kexec/kdump
291 * paths already have calls to disable_IO_APIC() in
292 * one form or other. kexec jump path also need
293 * one.
294 */
295 disable_IO_APIC();
296#endif
297 }
298
194 control_page = page_address(image->control_code_page) + PAGE_SIZE; 299 control_page = page_address(image->control_code_page) + PAGE_SIZE;
195 memcpy(control_page, relocate_kernel, PAGE_SIZE); 300 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
196 301
197 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 302 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
198 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 303 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
199 page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
200 page_list[VA_PGD] = (unsigned long)kexec_pgd;
201 page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
202 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
203 page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
204 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
205 page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
206 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
207 page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
208 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
209 page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
210 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
211 page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
212 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
213
214 page_list[PA_TABLE_PAGE] = 304 page_list[PA_TABLE_PAGE] =
215 (unsigned long)__pa(page_address(image->control_code_page)); 305 (unsigned long)__pa(page_address(image->control_code_page));
216 306
217 /* The segment registers are funny things, they have both a 307 if (image->type == KEXEC_TYPE_DEFAULT)
308 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
309 << PAGE_SHIFT);
310
311 /*
312 * The segment registers are funny things, they have both a
218 * visible and an invisible part. Whenever the visible part is 313 * visible and an invisible part. Whenever the visible part is
219 * set to a specific selector, the invisible part is loaded 314 * set to a specific selector, the invisible part is loaded
220 * with from a table in memory. At no other time is the 315 * with from a table in memory. At no other time is the
@@ -224,15 +319,25 @@ void machine_kexec(struct kimage *image)
224 * segments, before I zap the gdt with an invalid value. 319 * segments, before I zap the gdt with an invalid value.
225 */ 320 */
226 load_segments(); 321 load_segments();
227 /* The gdt & idt are now invalid. 322 /*
323 * The gdt & idt are now invalid.
228 * If you want to load them you must set up your own idt & gdt. 324 * If you want to load them you must set up your own idt & gdt.
229 */ 325 */
230 set_gdt(phys_to_virt(0),0); 326 set_gdt(phys_to_virt(0), 0);
231 set_idt(phys_to_virt(0),0); 327 set_idt(phys_to_virt(0), 0);
232 328
233 /* now call it */ 329 /* now call it */
234 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 330 image->start = relocate_kernel((unsigned long)image->head,
235 image->start); 331 (unsigned long)page_list,
332 image->start,
333 image->preserve_context);
334
335#ifdef CONFIG_KEXEC_JUMP
336 if (kexec_image->preserve_context)
337 restore_processor_state();
338#endif
339
340 __ftrace_enabled_restore(save_ftrace_enabled);
236} 341}
237 342
238void arch_crash_save_vmcoreinfo(void) 343void arch_crash_save_vmcoreinfo(void)