diff options
Diffstat (limited to 'arch/x86/kernel/machine_kexec_64.c')
| -rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 179 |
1 files changed, 142 insertions, 37 deletions
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c43caa3a91f..89cea4d4467 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
| @@ -12,20 +12,47 @@ | |||
| 12 | #include <linux/reboot.h> | 12 | #include <linux/reboot.h> |
| 13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
| 14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
| 15 | #include <linux/io.h> | ||
| 16 | #include <linux/suspend.h> | ||
| 15 | 17 | ||
| 16 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
| 17 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
| 18 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
| 19 | #include <asm/io.h> | ||
| 20 | 21 | ||
| 21 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
| 22 | static u64 kexec_pgd[512] PAGE_ALIGNED; | 23 | unsigned long addr) |
| 23 | static u64 kexec_pud0[512] PAGE_ALIGNED; | 24 | { |
| 24 | static u64 kexec_pmd0[512] PAGE_ALIGNED; | 25 | pud_t *pud; |
| 25 | static u64 kexec_pte0[512] PAGE_ALIGNED; | 26 | pmd_t *pmd; |
| 26 | static u64 kexec_pud1[512] PAGE_ALIGNED; | 27 | struct page *page; |
| 27 | static u64 kexec_pmd1[512] PAGE_ALIGNED; | 28 | int result = -ENOMEM; |
| 28 | static u64 kexec_pte1[512] PAGE_ALIGNED; | 29 | |
| 30 | addr &= PMD_MASK; | ||
| 31 | pgd += pgd_index(addr); | ||
| 32 | if (!pgd_present(*pgd)) { | ||
| 33 | page = kimage_alloc_control_pages(image, 0); | ||
| 34 | if (!page) | ||
| 35 | goto out; | ||
| 36 | pud = (pud_t *)page_address(page); | ||
| 37 | memset(pud, 0, PAGE_SIZE); | ||
| 38 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
| 39 | } | ||
| 40 | pud = pud_offset(pgd, addr); | ||
| 41 | if (!pud_present(*pud)) { | ||
| 42 | page = kimage_alloc_control_pages(image, 0); | ||
| 43 | if (!page) | ||
| 44 | goto out; | ||
| 45 | pmd = (pmd_t *)page_address(page); | ||
| 46 | memset(pmd, 0, PAGE_SIZE); | ||
| 47 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
| 48 | } | ||
| 49 | pmd = pmd_offset(pud, addr); | ||
| 50 | if (!pmd_present(*pmd)) | ||
| 51 | set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
| 52 | result = 0; | ||
| 53 | out: | ||
| 54 | return result; | ||
| 55 | } | ||
| 29 | 56 | ||
| 30 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | 57 | static void init_level2_page(pmd_t *level2p, unsigned long addr) |
| 31 | { | 58 | { |
| @@ -92,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p, | |||
| 92 | } | 119 | } |
| 93 | level3p = (pud_t *)page_address(page); | 120 | level3p = (pud_t *)page_address(page); |
| 94 | result = init_level3_page(image, level3p, addr, last_addr); | 121 | result = init_level3_page(image, level3p, addr, last_addr); |
| 95 | if (result) { | 122 | if (result) |
| 96 | goto out; | 123 | goto out; |
| 97 | } | ||
| 98 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); | 124 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); |
| 99 | addr += PGDIR_SIZE; | 125 | addr += PGDIR_SIZE; |
| 100 | } | 126 | } |
| @@ -107,12 +133,72 @@ out: | |||
| 107 | return result; | 133 | return result; |
| 108 | } | 134 | } |
| 109 | 135 | ||
| 136 | static void free_transition_pgtable(struct kimage *image) | ||
| 137 | { | ||
| 138 | free_page((unsigned long)image->arch.pud); | ||
| 139 | free_page((unsigned long)image->arch.pmd); | ||
| 140 | free_page((unsigned long)image->arch.pte); | ||
| 141 | } | ||
| 142 | |||
| 143 | static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) | ||
| 144 | { | ||
| 145 | pud_t *pud; | ||
| 146 | pmd_t *pmd; | ||
| 147 | pte_t *pte; | ||
| 148 | unsigned long vaddr, paddr; | ||
| 149 | int result = -ENOMEM; | ||
| 150 | |||
| 151 | vaddr = (unsigned long)relocate_kernel; | ||
| 152 | paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); | ||
| 153 | pgd += pgd_index(vaddr); | ||
| 154 | if (!pgd_present(*pgd)) { | ||
| 155 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL); | ||
| 156 | if (!pud) | ||
| 157 | goto err; | ||
| 158 | image->arch.pud = pud; | ||
| 159 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
| 160 | } | ||
| 161 | pud = pud_offset(pgd, vaddr); | ||
| 162 | if (!pud_present(*pud)) { | ||
| 163 | pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
| 164 | if (!pmd) | ||
| 165 | goto err; | ||
| 166 | image->arch.pmd = pmd; | ||
| 167 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
| 168 | } | ||
| 169 | pmd = pmd_offset(pud, vaddr); | ||
| 170 | if (!pmd_present(*pmd)) { | ||
| 171 | pte = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
| 172 | if (!pte) | ||
| 173 | goto err; | ||
| 174 | image->arch.pte = pte; | ||
| 175 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
| 176 | } | ||
| 177 | pte = pte_offset_kernel(pmd, vaddr); | ||
| 178 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | ||
| 179 | return 0; | ||
| 180 | err: | ||
| 181 | free_transition_pgtable(image); | ||
| 182 | return result; | ||
| 183 | } | ||
| 184 | |||
| 110 | 185 | ||
| 111 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | 186 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) |
| 112 | { | 187 | { |
| 113 | pgd_t *level4p; | 188 | pgd_t *level4p; |
| 189 | int result; | ||
| 114 | level4p = (pgd_t *)__va(start_pgtable); | 190 | level4p = (pgd_t *)__va(start_pgtable); |
| 115 | return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); | 191 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); |
| 192 | if (result) | ||
| 193 | return result; | ||
| 194 | /* | ||
| 195 | * image->start may be outside 0 ~ max_pfn, for example when | ||
| 196 | * jump back to original kernel from kexeced kernel | ||
| 197 | */ | ||
| 198 | result = init_one_level2_page(image, level4p, image->start); | ||
| 199 | if (result) | ||
| 200 | return result; | ||
| 201 | return init_transition_pgtable(image, level4p); | ||
| 116 | } | 202 | } |
| 117 | 203 | ||
| 118 | static void set_idt(void *newidt, u16 limit) | 204 | static void set_idt(void *newidt, u16 limit) |
| @@ -174,7 +260,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
| 174 | 260 | ||
| 175 | void machine_kexec_cleanup(struct kimage *image) | 261 | void machine_kexec_cleanup(struct kimage *image) |
| 176 | { | 262 | { |
| 177 | return; | 263 | free_transition_pgtable(image); |
| 178 | } | 264 | } |
| 179 | 265 | ||
| 180 | /* | 266 | /* |
| @@ -185,36 +271,45 @@ void machine_kexec(struct kimage *image) | |||
| 185 | { | 271 | { |
| 186 | unsigned long page_list[PAGES_NR]; | 272 | unsigned long page_list[PAGES_NR]; |
| 187 | void *control_page; | 273 | void *control_page; |
| 274 | int save_ftrace_enabled; | ||
| 188 | 275 | ||
| 189 | tracer_disable(); | 276 | #ifdef CONFIG_KEXEC_JUMP |
| 277 | if (kexec_image->preserve_context) | ||
| 278 | save_processor_state(); | ||
| 279 | #endif | ||
| 280 | |||
| 281 | save_ftrace_enabled = __ftrace_enabled_save(); | ||
| 190 | 282 | ||
| 191 | /* Interrupts aren't acceptable while we reboot */ | 283 | /* Interrupts aren't acceptable while we reboot */ |
| 192 | local_irq_disable(); | 284 | local_irq_disable(); |
| 193 | 285 | ||
| 286 | if (image->preserve_context) { | ||
| 287 | #ifdef CONFIG_X86_IO_APIC | ||
| 288 | /* | ||
| 289 | * We need to put APICs in legacy mode so that we can | ||
| 290 | * get timer interrupts in second kernel. kexec/kdump | ||
| 291 | * paths already have calls to disable_IO_APIC() in | ||
| 292 | * one form or other. kexec jump path also need | ||
| 293 | * one. | ||
| 294 | */ | ||
| 295 | disable_IO_APIC(); | ||
| 296 | #endif | ||
| 297 | } | ||
| 298 | |||
| 194 | control_page = page_address(image->control_code_page) + PAGE_SIZE; | 299 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
| 195 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 300 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
| 196 | 301 | ||
| 197 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); | 302 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); |
| 198 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 303 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
| 199 | page_list[PA_PGD] = virt_to_phys(&kexec_pgd); | ||
| 200 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | ||
| 201 | page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); | ||
| 202 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; | ||
| 203 | page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); | ||
| 204 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | ||
| 205 | page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); | ||
| 206 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
| 207 | page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); | ||
| 208 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | ||
| 209 | page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); | ||
| 210 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
| 211 | page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); | ||
| 212 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
| 213 | |||
| 214 | page_list[PA_TABLE_PAGE] = | 304 | page_list[PA_TABLE_PAGE] = |
| 215 | (unsigned long)__pa(page_address(image->control_code_page)); | 305 | (unsigned long)__pa(page_address(image->control_code_page)); |
| 216 | 306 | ||
| 217 | /* The segment registers are funny things, they have both a | 307 | if (image->type == KEXEC_TYPE_DEFAULT) |
| 308 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | ||
| 309 | << PAGE_SHIFT); | ||
| 310 | |||
| 311 | /* | ||
| 312 | * The segment registers are funny things, they have both a | ||
| 218 | * visible and an invisible part. Whenever the visible part is | 313 | * visible and an invisible part. Whenever the visible part is |
| 219 | * set to a specific selector, the invisible part is loaded | 314 | * set to a specific selector, the invisible part is loaded |
| 220 | * with from a table in memory. At no other time is the | 315 | * with from a table in memory. At no other time is the |
| @@ -224,15 +319,25 @@ void machine_kexec(struct kimage *image) | |||
| 224 | * segments, before I zap the gdt with an invalid value. | 319 | * segments, before I zap the gdt with an invalid value. |
| 225 | */ | 320 | */ |
| 226 | load_segments(); | 321 | load_segments(); |
| 227 | /* The gdt & idt are now invalid. | 322 | /* |
| 323 | * The gdt & idt are now invalid. | ||
| 228 | * If you want to load them you must set up your own idt & gdt. | 324 | * If you want to load them you must set up your own idt & gdt. |
| 229 | */ | 325 | */ |
| 230 | set_gdt(phys_to_virt(0),0); | 326 | set_gdt(phys_to_virt(0), 0); |
| 231 | set_idt(phys_to_virt(0),0); | 327 | set_idt(phys_to_virt(0), 0); |
| 232 | 328 | ||
| 233 | /* now call it */ | 329 | /* now call it */ |
| 234 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 330 | image->start = relocate_kernel((unsigned long)image->head, |
| 235 | image->start); | 331 | (unsigned long)page_list, |
| 332 | image->start, | ||
| 333 | image->preserve_context); | ||
| 334 | |||
| 335 | #ifdef CONFIG_KEXEC_JUMP | ||
| 336 | if (kexec_image->preserve_context) | ||
| 337 | restore_processor_state(); | ||
| 338 | #endif | ||
| 339 | |||
| 340 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
| 236 | } | 341 | } |
| 237 | 342 | ||
| 238 | void arch_crash_save_vmcoreinfo(void) | 343 | void arch_crash_save_vmcoreinfo(void) |
