diff options
Diffstat (limited to 'arch/x86/kernel/machine_kexec_64.c')
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 179 |
1 files changed, 142 insertions, 37 deletions
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c43caa3a91f3..89cea4d44679 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -12,20 +12,47 @@ | |||
12 | #include <linux/reboot.h> | 12 | #include <linux/reboot.h> |
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | #include <linux/io.h> | ||
16 | #include <linux/suspend.h> | ||
15 | 17 | ||
16 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
17 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
18 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
19 | #include <asm/io.h> | ||
20 | 21 | ||
21 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
22 | static u64 kexec_pgd[512] PAGE_ALIGNED; | 23 | unsigned long addr) |
23 | static u64 kexec_pud0[512] PAGE_ALIGNED; | 24 | { |
24 | static u64 kexec_pmd0[512] PAGE_ALIGNED; | 25 | pud_t *pud; |
25 | static u64 kexec_pte0[512] PAGE_ALIGNED; | 26 | pmd_t *pmd; |
26 | static u64 kexec_pud1[512] PAGE_ALIGNED; | 27 | struct page *page; |
27 | static u64 kexec_pmd1[512] PAGE_ALIGNED; | 28 | int result = -ENOMEM; |
28 | static u64 kexec_pte1[512] PAGE_ALIGNED; | 29 | |
30 | addr &= PMD_MASK; | ||
31 | pgd += pgd_index(addr); | ||
32 | if (!pgd_present(*pgd)) { | ||
33 | page = kimage_alloc_control_pages(image, 0); | ||
34 | if (!page) | ||
35 | goto out; | ||
36 | pud = (pud_t *)page_address(page); | ||
37 | memset(pud, 0, PAGE_SIZE); | ||
38 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
39 | } | ||
40 | pud = pud_offset(pgd, addr); | ||
41 | if (!pud_present(*pud)) { | ||
42 | page = kimage_alloc_control_pages(image, 0); | ||
43 | if (!page) | ||
44 | goto out; | ||
45 | pmd = (pmd_t *)page_address(page); | ||
46 | memset(pmd, 0, PAGE_SIZE); | ||
47 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
48 | } | ||
49 | pmd = pmd_offset(pud, addr); | ||
50 | if (!pmd_present(*pmd)) | ||
51 | set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
52 | result = 0; | ||
53 | out: | ||
54 | return result; | ||
55 | } | ||
29 | 56 | ||
30 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | 57 | static void init_level2_page(pmd_t *level2p, unsigned long addr) |
31 | { | 58 | { |
@@ -92,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p, | |||
92 | } | 119 | } |
93 | level3p = (pud_t *)page_address(page); | 120 | level3p = (pud_t *)page_address(page); |
94 | result = init_level3_page(image, level3p, addr, last_addr); | 121 | result = init_level3_page(image, level3p, addr, last_addr); |
95 | if (result) { | 122 | if (result) |
96 | goto out; | 123 | goto out; |
97 | } | ||
98 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); | 124 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); |
99 | addr += PGDIR_SIZE; | 125 | addr += PGDIR_SIZE; |
100 | } | 126 | } |
@@ -107,12 +133,72 @@ out: | |||
107 | return result; | 133 | return result; |
108 | } | 134 | } |
109 | 135 | ||
136 | static void free_transition_pgtable(struct kimage *image) | ||
137 | { | ||
138 | free_page((unsigned long)image->arch.pud); | ||
139 | free_page((unsigned long)image->arch.pmd); | ||
140 | free_page((unsigned long)image->arch.pte); | ||
141 | } | ||
142 | |||
143 | static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) | ||
144 | { | ||
145 | pud_t *pud; | ||
146 | pmd_t *pmd; | ||
147 | pte_t *pte; | ||
148 | unsigned long vaddr, paddr; | ||
149 | int result = -ENOMEM; | ||
150 | |||
151 | vaddr = (unsigned long)relocate_kernel; | ||
152 | paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); | ||
153 | pgd += pgd_index(vaddr); | ||
154 | if (!pgd_present(*pgd)) { | ||
155 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL); | ||
156 | if (!pud) | ||
157 | goto err; | ||
158 | image->arch.pud = pud; | ||
159 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
160 | } | ||
161 | pud = pud_offset(pgd, vaddr); | ||
162 | if (!pud_present(*pud)) { | ||
163 | pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
164 | if (!pmd) | ||
165 | goto err; | ||
166 | image->arch.pmd = pmd; | ||
167 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
168 | } | ||
169 | pmd = pmd_offset(pud, vaddr); | ||
170 | if (!pmd_present(*pmd)) { | ||
171 | pte = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
172 | if (!pte) | ||
173 | goto err; | ||
174 | image->arch.pte = pte; | ||
175 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
176 | } | ||
177 | pte = pte_offset_kernel(pmd, vaddr); | ||
178 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | ||
179 | return 0; | ||
180 | err: | ||
181 | free_transition_pgtable(image); | ||
182 | return result; | ||
183 | } | ||
184 | |||
110 | 185 | ||
111 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | 186 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) |
112 | { | 187 | { |
113 | pgd_t *level4p; | 188 | pgd_t *level4p; |
189 | int result; | ||
114 | level4p = (pgd_t *)__va(start_pgtable); | 190 | level4p = (pgd_t *)__va(start_pgtable); |
115 | return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); | 191 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); |
192 | if (result) | ||
193 | return result; | ||
194 | /* | ||
195 | * image->start may be outside 0 ~ max_pfn, for example when | ||
196 | * jump back to original kernel from kexeced kernel | ||
197 | */ | ||
198 | result = init_one_level2_page(image, level4p, image->start); | ||
199 | if (result) | ||
200 | return result; | ||
201 | return init_transition_pgtable(image, level4p); | ||
116 | } | 202 | } |
117 | 203 | ||
118 | static void set_idt(void *newidt, u16 limit) | 204 | static void set_idt(void *newidt, u16 limit) |
@@ -174,7 +260,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
174 | 260 | ||
175 | void machine_kexec_cleanup(struct kimage *image) | 261 | void machine_kexec_cleanup(struct kimage *image) |
176 | { | 262 | { |
177 | return; | 263 | free_transition_pgtable(image); |
178 | } | 264 | } |
179 | 265 | ||
180 | /* | 266 | /* |
@@ -185,36 +271,45 @@ void machine_kexec(struct kimage *image) | |||
185 | { | 271 | { |
186 | unsigned long page_list[PAGES_NR]; | 272 | unsigned long page_list[PAGES_NR]; |
187 | void *control_page; | 273 | void *control_page; |
274 | int save_ftrace_enabled; | ||
188 | 275 | ||
189 | tracer_disable(); | 276 | #ifdef CONFIG_KEXEC_JUMP |
277 | if (kexec_image->preserve_context) | ||
278 | save_processor_state(); | ||
279 | #endif | ||
280 | |||
281 | save_ftrace_enabled = __ftrace_enabled_save(); | ||
190 | 282 | ||
191 | /* Interrupts aren't acceptable while we reboot */ | 283 | /* Interrupts aren't acceptable while we reboot */ |
192 | local_irq_disable(); | 284 | local_irq_disable(); |
193 | 285 | ||
286 | if (image->preserve_context) { | ||
287 | #ifdef CONFIG_X86_IO_APIC | ||
288 | /* | ||
289 | * We need to put APICs in legacy mode so that we can | ||
290 | * get timer interrupts in second kernel. kexec/kdump | ||
291 | * paths already have calls to disable_IO_APIC() in | ||
292 | * one form or other. kexec jump path also need | ||
293 | * one. | ||
294 | */ | ||
295 | disable_IO_APIC(); | ||
296 | #endif | ||
297 | } | ||
298 | |||
194 | control_page = page_address(image->control_code_page) + PAGE_SIZE; | 299 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
195 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 300 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
196 | 301 | ||
197 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); | 302 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); |
198 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 303 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
199 | page_list[PA_PGD] = virt_to_phys(&kexec_pgd); | ||
200 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | ||
201 | page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); | ||
202 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; | ||
203 | page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); | ||
204 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | ||
205 | page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); | ||
206 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
207 | page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); | ||
208 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | ||
209 | page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); | ||
210 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
211 | page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); | ||
212 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
213 | |||
214 | page_list[PA_TABLE_PAGE] = | 304 | page_list[PA_TABLE_PAGE] = |
215 | (unsigned long)__pa(page_address(image->control_code_page)); | 305 | (unsigned long)__pa(page_address(image->control_code_page)); |
216 | 306 | ||
217 | /* The segment registers are funny things, they have both a | 307 | if (image->type == KEXEC_TYPE_DEFAULT) |
308 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | ||
309 | << PAGE_SHIFT); | ||
310 | |||
311 | /* | ||
312 | * The segment registers are funny things, they have both a | ||
218 | * visible and an invisible part. Whenever the visible part is | 313 | * visible and an invisible part. Whenever the visible part is |
219 | * set to a specific selector, the invisible part is loaded | 314 | * set to a specific selector, the invisible part is loaded |
220 | * with from a table in memory. At no other time is the | 315 | * with from a table in memory. At no other time is the |
@@ -224,15 +319,25 @@ void machine_kexec(struct kimage *image) | |||
224 | * segments, before I zap the gdt with an invalid value. | 319 | * segments, before I zap the gdt with an invalid value. |
225 | */ | 320 | */ |
226 | load_segments(); | 321 | load_segments(); |
227 | /* The gdt & idt are now invalid. | 322 | /* |
323 | * The gdt & idt are now invalid. | ||
228 | * If you want to load them you must set up your own idt & gdt. | 324 | * If you want to load them you must set up your own idt & gdt. |
229 | */ | 325 | */ |
230 | set_gdt(phys_to_virt(0),0); | 326 | set_gdt(phys_to_virt(0), 0); |
231 | set_idt(phys_to_virt(0),0); | 327 | set_idt(phys_to_virt(0), 0); |
232 | 328 | ||
233 | /* now call it */ | 329 | /* now call it */ |
234 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 330 | image->start = relocate_kernel((unsigned long)image->head, |
235 | image->start); | 331 | (unsigned long)page_list, |
332 | image->start, | ||
333 | image->preserve_context); | ||
334 | |||
335 | #ifdef CONFIG_KEXEC_JUMP | ||
336 | if (kexec_image->preserve_context) | ||
337 | restore_processor_state(); | ||
338 | #endif | ||
339 | |||
340 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
236 | } | 341 | } |
237 | 342 | ||
238 | void arch_crash_save_vmcoreinfo(void) | 343 | void arch_crash_save_vmcoreinfo(void) |