aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2014-08-08 17:26:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:33 -0400
commitdd5f726076cc7639d9713b334c8c133f77c6757a (patch)
tree6b3e88bdf3e764d97eb88464e31abc097dab44f6
parent27f48d3e633be23656a097baa3be336e04a82d84 (diff)
kexec: support for kexec on panic using new system call
This patch adds support for loading a kexec on panic (kdump) kernel usning new system call. It prepares ELF headers for memory areas to be dumped and for saved cpu registers. Also prepares the memory map for second kernel and limits its boot to reserved areas only. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Cc: Borislav Petkov <bp@suse.de> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Eric Biederman <ebiederm@xmission.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Matthew Garrett <mjg59@srcf.ucam.org> Cc: Greg Kroah-Hartman <greg@kroah.com> Cc: Dave Young <dyoung@redhat.com> Cc: WANG Chao <chaowang@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/kexec.h30
-rw-r--r--arch/x86/kernel/crash.c563
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c55
-rw-r--r--arch/x86/kernel/machine_kexec_64.c40
-rw-r--r--arch/x86/purgatory/entry64.S6
-rw-r--r--kernel/kexec.c46
7 files changed, 724 insertions, 25 deletions
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
new file mode 100644
index 000000000000..f498411f2500
--- /dev/null
+++ b/arch/x86/include/asm/crash.h
@@ -0,0 +1,9 @@
1#ifndef _ASM_X86_CRASH_H
2#define _ASM_X86_CRASH_H
3
4int crash_load_segments(struct kimage *image);
5int crash_copy_backup_region(struct kimage *image);
6int crash_setup_memmap_entries(struct kimage *image,
7 struct boot_params *params);
8
9#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0dfccced4edf..d2434c1cad05 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -25,6 +25,8 @@
25#include <asm/ptrace.h> 25#include <asm/ptrace.h>
26#include <asm/bootparam.h> 26#include <asm/bootparam.h>
27 27
28struct kimage;
29
28/* 30/*
29 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. 31 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
30 * I.e. Maximum page that is mapped directly into kernel memory, 32 * I.e. Maximum page that is mapped directly into kernel memory,
@@ -62,6 +64,10 @@
62# define KEXEC_ARCH KEXEC_ARCH_X86_64 64# define KEXEC_ARCH KEXEC_ARCH_X86_64
63#endif 65#endif
64 66
67/* Memory to backup during crash kdump */
68#define KEXEC_BACKUP_SRC_START (0UL)
69#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
70
65/* 71/*
66 * CPU does not save ss and sp on stack if execution is already 72 * CPU does not save ss and sp on stack if execution is already
67 * running in kernel mode at the time of NMI occurrence. This code 73 * running in kernel mode at the time of NMI occurrence. This code
@@ -161,17 +167,35 @@ struct kimage_arch {
161 pud_t *pud; 167 pud_t *pud;
162 pmd_t *pmd; 168 pmd_t *pmd;
163 pte_t *pte; 169 pte_t *pte;
170 /* Details of backup region */
171 unsigned long backup_src_start;
172 unsigned long backup_src_sz;
173
174 /* Physical address of backup segment */
175 unsigned long backup_load_addr;
176
177 /* Core ELF header buffer */
178 void *elf_headers;
179 unsigned long elf_headers_sz;
180 unsigned long elf_load_addr;
164}; 181};
182#endif /* CONFIG_X86_32 */
165 183
184#ifdef CONFIG_X86_64
185/*
186 * Number of elements and order of elements in this structure should match
187 * with the ones in arch/x86/purgatory/entry64.S. If you make a change here
188 * make an appropriate change in purgatory too.
189 */
166struct kexec_entry64_regs { 190struct kexec_entry64_regs {
167 uint64_t rax; 191 uint64_t rax;
168 uint64_t rbx;
169 uint64_t rcx; 192 uint64_t rcx;
170 uint64_t rdx; 193 uint64_t rdx;
171 uint64_t rsi; 194 uint64_t rbx;
172 uint64_t rdi;
173 uint64_t rsp; 195 uint64_t rsp;
174 uint64_t rbp; 196 uint64_t rbp;
197 uint64_t rsi;
198 uint64_t rdi;
175 uint64_t r8; 199 uint64_t r8;
176 uint64_t r9; 200 uint64_t r9;
177 uint64_t r10; 201 uint64_t r10;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 507de8066594..0553a34fa0df 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -4,9 +4,14 @@
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) 4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 * 5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved. 6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 * Copyright (C) Red Hat Inc., 2014. All rights reserved.
8 * Authors:
9 * Vivek Goyal <vgoyal@redhat.com>
7 * 10 *
8 */ 11 */
9 12
13#define pr_fmt(fmt) "kexec: " fmt
14
10#include <linux/types.h> 15#include <linux/types.h>
11#include <linux/kernel.h> 16#include <linux/kernel.h>
12#include <linux/smp.h> 17#include <linux/smp.h>
@@ -16,6 +21,7 @@
16#include <linux/elf.h> 21#include <linux/elf.h>
17#include <linux/elfcore.h> 22#include <linux/elfcore.h>
18#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/slab.h>
19 25
20#include <asm/processor.h> 26#include <asm/processor.h>
21#include <asm/hardirq.h> 27#include <asm/hardirq.h>
@@ -28,6 +34,45 @@
28#include <asm/reboot.h> 34#include <asm/reboot.h>
29#include <asm/virtext.h> 35#include <asm/virtext.h>
30 36
37/* Alignment required for elf header segment */
38#define ELF_CORE_HEADER_ALIGN 4096
39
40/* This primarily represents number of split ranges due to exclusion */
41#define CRASH_MAX_RANGES 16
42
43struct crash_mem_range {
44 u64 start, end;
45};
46
47struct crash_mem {
48 unsigned int nr_ranges;
49 struct crash_mem_range ranges[CRASH_MAX_RANGES];
50};
51
52/* Misc data about ram ranges needed to prepare elf headers */
53struct crash_elf_data {
54 struct kimage *image;
55 /*
56 * Total number of ram ranges we have after various adjustments for
57 * GART, crash reserved region etc.
58 */
59 unsigned int max_nr_ranges;
60 unsigned long gart_start, gart_end;
61
62 /* Pointer to elf header */
63 void *ehdr;
64 /* Pointer to next phdr */
65 void *bufp;
66 struct crash_mem mem;
67};
68
69/* Used while preparing memory map entries for second kernel */
70struct crash_memmap_data {
71 struct boot_params *params;
72 /* Type of memory */
73 unsigned int type;
74};
75
31int in_crash_kexec; 76int in_crash_kexec;
32 77
33/* 78/*
@@ -39,6 +84,7 @@ int in_crash_kexec;
39 */ 84 */
40crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; 85crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
41EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); 86EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
87unsigned long crash_zero_bytes;
42 88
43static inline void cpu_crash_vmclear_loaded_vmcss(void) 89static inline void cpu_crash_vmclear_loaded_vmcss(void)
44{ 90{
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
135#endif 181#endif
136 crash_save_cpu(regs, safe_smp_processor_id()); 182 crash_save_cpu(regs, safe_smp_processor_id());
137} 183}
184
185#ifdef CONFIG_X86_64
186
187static int get_nr_ram_ranges_callback(unsigned long start_pfn,
188 unsigned long nr_pfn, void *arg)
189{
190 int *nr_ranges = arg;
191
192 (*nr_ranges)++;
193 return 0;
194}
195
196static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
197{
198 struct crash_elf_data *ced = arg;
199
200 ced->gart_start = start;
201 ced->gart_end = end;
202
203 /* Not expecting more than 1 gart aperture */
204 return 1;
205}
206
207
208/* Gather all the required information to prepare elf headers for ram regions */
209static void fill_up_crash_elf_data(struct crash_elf_data *ced,
210 struct kimage *image)
211{
212 unsigned int nr_ranges = 0;
213
214 ced->image = image;
215
216 walk_system_ram_range(0, -1, &nr_ranges,
217 get_nr_ram_ranges_callback);
218
219 ced->max_nr_ranges = nr_ranges;
220
221 /*
222 * We don't create ELF headers for GART aperture as an attempt
223 * to dump this memory in second kernel leads to hang/crash.
224 * If gart aperture is present, one needs to exclude that region
225 * and that could lead to need of extra phdr.
226 */
227 walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
228 ced, get_gart_ranges_callback);
229
230 /*
231 * If we have gart region, excluding that could potentially split
232 * a memory range, resulting in extra header. Account for that.
233 */
234 if (ced->gart_end)
235 ced->max_nr_ranges++;
236
237 /* Exclusion of crash region could split memory ranges */
238 ced->max_nr_ranges++;
239
240 /* If crashk_low_res is not 0, another range split possible */
241 if (crashk_low_res.end != 0)
242 ced->max_nr_ranges++;
243}
244
245static int exclude_mem_range(struct crash_mem *mem,
246 unsigned long long mstart, unsigned long long mend)
247{
248 int i, j;
249 unsigned long long start, end;
250 struct crash_mem_range temp_range = {0, 0};
251
252 for (i = 0; i < mem->nr_ranges; i++) {
253 start = mem->ranges[i].start;
254 end = mem->ranges[i].end;
255
256 if (mstart > end || mend < start)
257 continue;
258
259 /* Truncate any area outside of range */
260 if (mstart < start)
261 mstart = start;
262 if (mend > end)
263 mend = end;
264
265 /* Found completely overlapping range */
266 if (mstart == start && mend == end) {
267 mem->ranges[i].start = 0;
268 mem->ranges[i].end = 0;
269 if (i < mem->nr_ranges - 1) {
270 /* Shift rest of the ranges to left */
271 for (j = i; j < mem->nr_ranges - 1; j++) {
272 mem->ranges[j].start =
273 mem->ranges[j+1].start;
274 mem->ranges[j].end =
275 mem->ranges[j+1].end;
276 }
277 }
278 mem->nr_ranges--;
279 return 0;
280 }
281
282 if (mstart > start && mend < end) {
283 /* Split original range */
284 mem->ranges[i].end = mstart - 1;
285 temp_range.start = mend + 1;
286 temp_range.end = end;
287 } else if (mstart != start)
288 mem->ranges[i].end = mstart - 1;
289 else
290 mem->ranges[i].start = mend + 1;
291 break;
292 }
293
294 /* If a split happend, add the split to array */
295 if (!temp_range.end)
296 return 0;
297
298 /* Split happened */
299 if (i == CRASH_MAX_RANGES - 1) {
300 pr_err("Too many crash ranges after split\n");
301 return -ENOMEM;
302 }
303
304 /* Location where new range should go */
305 j = i + 1;
306 if (j < mem->nr_ranges) {
307 /* Move over all ranges one slot towards the end */
308 for (i = mem->nr_ranges - 1; i >= j; i--)
309 mem->ranges[i + 1] = mem->ranges[i];
310 }
311
312 mem->ranges[j].start = temp_range.start;
313 mem->ranges[j].end = temp_range.end;
314 mem->nr_ranges++;
315 return 0;
316}
317
318/*
319 * Look for any unwanted ranges between mstart, mend and remove them. This
320 * might lead to split and split ranges are put in ced->mem.ranges[] array
321 */
322static int elf_header_exclude_ranges(struct crash_elf_data *ced,
323 unsigned long long mstart, unsigned long long mend)
324{
325 struct crash_mem *cmem = &ced->mem;
326 int ret = 0;
327
328 memset(cmem->ranges, 0, sizeof(cmem->ranges));
329
330 cmem->ranges[0].start = mstart;
331 cmem->ranges[0].end = mend;
332 cmem->nr_ranges = 1;
333
334 /* Exclude crashkernel region */
335 ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
336 if (ret)
337 return ret;
338
339 ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
340 if (ret)
341 return ret;
342
343 /* Exclude GART region */
344 if (ced->gart_end) {
345 ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
346 if (ret)
347 return ret;
348 }
349
350 return ret;
351}
352
353static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
354{
355 struct crash_elf_data *ced = arg;
356 Elf64_Ehdr *ehdr;
357 Elf64_Phdr *phdr;
358 unsigned long mstart, mend;
359 struct kimage *image = ced->image;
360 struct crash_mem *cmem;
361 int ret, i;
362
363 ehdr = ced->ehdr;
364
365 /* Exclude unwanted mem ranges */
366 ret = elf_header_exclude_ranges(ced, start, end);
367 if (ret)
368 return ret;
369
370 /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
371 cmem = &ced->mem;
372
373 for (i = 0; i < cmem->nr_ranges; i++) {
374 mstart = cmem->ranges[i].start;
375 mend = cmem->ranges[i].end;
376
377 phdr = ced->bufp;
378 ced->bufp += sizeof(Elf64_Phdr);
379
380 phdr->p_type = PT_LOAD;
381 phdr->p_flags = PF_R|PF_W|PF_X;
382 phdr->p_offset = mstart;
383
384 /*
385 * If a range matches backup region, adjust offset to backup
386 * segment.
387 */
388 if (mstart == image->arch.backup_src_start &&
389 (mend - mstart + 1) == image->arch.backup_src_sz)
390 phdr->p_offset = image->arch.backup_load_addr;
391
392 phdr->p_paddr = mstart;
393 phdr->p_vaddr = (unsigned long long) __va(mstart);
394 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
395 phdr->p_align = 0;
396 ehdr->e_phnum++;
397 pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
398 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
399 ehdr->e_phnum, phdr->p_offset);
400 }
401
402 return ret;
403}
404
405static int prepare_elf64_headers(struct crash_elf_data *ced,
406 void **addr, unsigned long *sz)
407{
408 Elf64_Ehdr *ehdr;
409 Elf64_Phdr *phdr;
410 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
411 unsigned char *buf, *bufp;
412 unsigned int cpu;
413 unsigned long long notes_addr;
414 int ret;
415
416 /* extra phdr for vmcoreinfo elf note */
417 nr_phdr = nr_cpus + 1;
418 nr_phdr += ced->max_nr_ranges;
419
420 /*
421 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
422 * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
423 * I think this is required by tools like gdb. So same physical
424 * memory will be mapped in two elf headers. One will contain kernel
425 * text virtual addresses and other will have __va(physical) addresses.
426 */
427
428 nr_phdr++;
429 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
430 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
431
432 buf = vzalloc(elf_sz);
433 if (!buf)
434 return -ENOMEM;
435
436 bufp = buf;
437 ehdr = (Elf64_Ehdr *)bufp;
438 bufp += sizeof(Elf64_Ehdr);
439 memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
440 ehdr->e_ident[EI_CLASS] = ELFCLASS64;
441 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
442 ehdr->e_ident[EI_VERSION] = EV_CURRENT;
443 ehdr->e_ident[EI_OSABI] = ELF_OSABI;
444 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
445 ehdr->e_type = ET_CORE;
446 ehdr->e_machine = ELF_ARCH;
447 ehdr->e_version = EV_CURRENT;
448 ehdr->e_phoff = sizeof(Elf64_Ehdr);
449 ehdr->e_ehsize = sizeof(Elf64_Ehdr);
450 ehdr->e_phentsize = sizeof(Elf64_Phdr);
451
452 /* Prepare one phdr of type PT_NOTE for each present cpu */
453 for_each_present_cpu(cpu) {
454 phdr = (Elf64_Phdr *)bufp;
455 bufp += sizeof(Elf64_Phdr);
456 phdr->p_type = PT_NOTE;
457 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
458 phdr->p_offset = phdr->p_paddr = notes_addr;
459 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
460 (ehdr->e_phnum)++;
461 }
462
463 /* Prepare one PT_NOTE header for vmcoreinfo */
464 phdr = (Elf64_Phdr *)bufp;
465 bufp += sizeof(Elf64_Phdr);
466 phdr->p_type = PT_NOTE;
467 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
468 phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
469 (ehdr->e_phnum)++;
470
471#ifdef CONFIG_X86_64
472 /* Prepare PT_LOAD type program header for kernel text region */
473 phdr = (Elf64_Phdr *)bufp;
474 bufp += sizeof(Elf64_Phdr);
475 phdr->p_type = PT_LOAD;
476 phdr->p_flags = PF_R|PF_W|PF_X;
477 phdr->p_vaddr = (Elf64_Addr)_text;
478 phdr->p_filesz = phdr->p_memsz = _end - _text;
479 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
480 (ehdr->e_phnum)++;
481#endif
482
483 /* Prepare PT_LOAD headers for system ram chunks. */
484 ced->ehdr = ehdr;
485 ced->bufp = bufp;
486 ret = walk_system_ram_res(0, -1, ced,
487 prepare_elf64_ram_headers_callback);
488 if (ret < 0)
489 return ret;
490
491 *addr = buf;
492 *sz = elf_sz;
493 return 0;
494}
495
496/* Prepare elf headers. Return addr and size */
497static int prepare_elf_headers(struct kimage *image, void **addr,
498 unsigned long *sz)
499{
500 struct crash_elf_data *ced;
501 int ret;
502
503 ced = kzalloc(sizeof(*ced), GFP_KERNEL);
504 if (!ced)
505 return -ENOMEM;
506
507 fill_up_crash_elf_data(ced, image);
508
509 /* By default prepare 64bit headers */
510 ret = prepare_elf64_headers(ced, addr, sz);
511 kfree(ced);
512 return ret;
513}
514
515static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
516{
517 unsigned int nr_e820_entries;
518
519 nr_e820_entries = params->e820_entries;
520 if (nr_e820_entries >= E820MAX)
521 return 1;
522
523 memcpy(&params->e820_map[nr_e820_entries], entry,
524 sizeof(struct e820entry));
525 params->e820_entries++;
526 return 0;
527}
528
529static int memmap_entry_callback(u64 start, u64 end, void *arg)
530{
531 struct crash_memmap_data *cmd = arg;
532 struct boot_params *params = cmd->params;
533 struct e820entry ei;
534
535 ei.addr = start;
536 ei.size = end - start + 1;
537 ei.type = cmd->type;
538 add_e820_entry(params, &ei);
539
540 return 0;
541}
542
543static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
544 unsigned long long mstart,
545 unsigned long long mend)
546{
547 unsigned long start, end;
548 int ret = 0;
549
550 cmem->ranges[0].start = mstart;
551 cmem->ranges[0].end = mend;
552 cmem->nr_ranges = 1;
553
554 /* Exclude Backup region */
555 start = image->arch.backup_load_addr;
556 end = start + image->arch.backup_src_sz - 1;
557 ret = exclude_mem_range(cmem, start, end);
558 if (ret)
559 return ret;
560
561 /* Exclude elf header region */
562 start = image->arch.elf_load_addr;
563 end = start + image->arch.elf_headers_sz - 1;
564 return exclude_mem_range(cmem, start, end);
565}
566
567/* Prepare memory map for crash dump kernel */
568int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
569{
570 int i, ret = 0;
571 unsigned long flags;
572 struct e820entry ei;
573 struct crash_memmap_data cmd;
574 struct crash_mem *cmem;
575
576 cmem = vzalloc(sizeof(struct crash_mem));
577 if (!cmem)
578 return -ENOMEM;
579
580 memset(&cmd, 0, sizeof(struct crash_memmap_data));
581 cmd.params = params;
582
583 /* Add first 640K segment */
584 ei.addr = image->arch.backup_src_start;
585 ei.size = image->arch.backup_src_sz;
586 ei.type = E820_RAM;
587 add_e820_entry(params, &ei);
588
589 /* Add ACPI tables */
590 cmd.type = E820_ACPI;
591 flags = IORESOURCE_MEM | IORESOURCE_BUSY;
592 walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
593 memmap_entry_callback);
594
595 /* Add ACPI Non-volatile Storage */
596 cmd.type = E820_NVS;
597 walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
598 memmap_entry_callback);
599
600 /* Add crashk_low_res region */
601 if (crashk_low_res.end) {
602 ei.addr = crashk_low_res.start;
603 ei.size = crashk_low_res.end - crashk_low_res.start + 1;
604 ei.type = E820_RAM;
605 add_e820_entry(params, &ei);
606 }
607
608 /* Exclude some ranges from crashk_res and add rest to memmap */
609 ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
610 crashk_res.end);
611 if (ret)
612 goto out;
613
614 for (i = 0; i < cmem->nr_ranges; i++) {
615 ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
616
617 /* If entry is less than a page, skip it */
618 if (ei.size < PAGE_SIZE)
619 continue;
620 ei.addr = cmem->ranges[i].start;
621 ei.type = E820_RAM;
622 add_e820_entry(params, &ei);
623 }
624
625out:
626 vfree(cmem);
627 return ret;
628}
629
630static int determine_backup_region(u64 start, u64 end, void *arg)
631{
632 struct kimage *image = arg;
633
634 image->arch.backup_src_start = start;
635 image->arch.backup_src_sz = end - start + 1;
636
637 /* Expecting only one range for backup region */
638 return 1;
639}
640
641int crash_load_segments(struct kimage *image)
642{
643 unsigned long src_start, src_sz, elf_sz;
644 void *elf_addr;
645 int ret;
646
647 /*
648 * Determine and load a segment for backup area. First 640K RAM
649 * region is backup source
650 */
651
652 ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
653 image, determine_backup_region);
654
655 /* Zero or postive return values are ok */
656 if (ret < 0)
657 return ret;
658
659 src_start = image->arch.backup_src_start;
660 src_sz = image->arch.backup_src_sz;
661
662 /* Add backup segment. */
663 if (src_sz) {
664 /*
665 * Ideally there is no source for backup segment. This is
666 * copied in purgatory after crash. Just add a zero filled
667 * segment for now to make sure checksum logic works fine.
668 */
669 ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
670 sizeof(crash_zero_bytes), src_sz,
671 PAGE_SIZE, 0, -1, 0,
672 &image->arch.backup_load_addr);
673 if (ret)
674 return ret;
675 pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
676 image->arch.backup_load_addr, src_start, src_sz);
677 }
678
679 /* Prepare elf headers and add a segment */
680 ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
681 if (ret)
682 return ret;
683
684 image->arch.elf_headers = elf_addr;
685 image->arch.elf_headers_sz = elf_sz;
686
687 ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
688 ELF_CORE_HEADER_ALIGN, 0, -1, 0,
689 &image->arch.elf_load_addr);
690 if (ret) {
691 vfree((void *)image->arch.elf_headers);
692 return ret;
693 }
694 pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
695 image->arch.elf_load_addr, elf_sz, elf_sz);
696
697 return ret;
698}
699
700#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index bcedd100192f..a8e646458a10 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -21,6 +21,9 @@
21 21
22#include <asm/bootparam.h> 22#include <asm/bootparam.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/crash.h>
25
26#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
24 27
25/* 28/*
26 * Defines lowest physical address for various segments. Not sure where 29 * Defines lowest physical address for various segments. Not sure where
@@ -58,18 +61,24 @@ static int setup_initrd(struct boot_params *params,
58 return 0; 61 return 0;
59} 62}
60 63
61static int setup_cmdline(struct boot_params *params, 64static int setup_cmdline(struct kimage *image, struct boot_params *params,
62 unsigned long bootparams_load_addr, 65 unsigned long bootparams_load_addr,
63 unsigned long cmdline_offset, char *cmdline, 66 unsigned long cmdline_offset, char *cmdline,
64 unsigned long cmdline_len) 67 unsigned long cmdline_len)
65{ 68{
66 char *cmdline_ptr = ((char *)params) + cmdline_offset; 69 char *cmdline_ptr = ((char *)params) + cmdline_offset;
67 unsigned long cmdline_ptr_phys; 70 unsigned long cmdline_ptr_phys, len;
68 uint32_t cmdline_low_32, cmdline_ext_32; 71 uint32_t cmdline_low_32, cmdline_ext_32;
69 72
70 memcpy(cmdline_ptr, cmdline, cmdline_len); 73 memcpy(cmdline_ptr, cmdline, cmdline_len);
74 if (image->type == KEXEC_TYPE_CRASH) {
75 len = sprintf(cmdline_ptr + cmdline_len - 1,
76 " elfcorehdr=0x%lx", image->arch.elf_load_addr);
77 cmdline_len += len;
78 }
71 cmdline_ptr[cmdline_len - 1] = '\0'; 79 cmdline_ptr[cmdline_len - 1] = '\0';
72 80
81 pr_debug("Final command line is: %s\n", cmdline_ptr);
73 cmdline_ptr_phys = bootparams_load_addr + cmdline_offset; 82 cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
74 cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL; 83 cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
75 cmdline_ext_32 = cmdline_ptr_phys >> 32; 84 cmdline_ext_32 = cmdline_ptr_phys >> 32;
@@ -98,11 +107,12 @@ static int setup_memory_map_entries(struct boot_params *params)
98 return 0; 107 return 0;
99} 108}
100 109
101static int setup_boot_parameters(struct boot_params *params) 110static int setup_boot_parameters(struct kimage *image,
111 struct boot_params *params)
102{ 112{
103 unsigned int nr_e820_entries; 113 unsigned int nr_e820_entries;
104 unsigned long long mem_k, start, end; 114 unsigned long long mem_k, start, end;
105 int i; 115 int i, ret = 0;
106 116
107 /* Get subarch from existing bootparams */ 117 /* Get subarch from existing bootparams */
108 params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; 118 params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
@@ -125,7 +135,13 @@ static int setup_boot_parameters(struct boot_params *params)
125 /* Default sysdesc table */ 135 /* Default sysdesc table */
126 params->sys_desc_table.length = 0; 136 params->sys_desc_table.length = 0;
127 137
128 setup_memory_map_entries(params); 138 if (image->type == KEXEC_TYPE_CRASH) {
139 ret = crash_setup_memmap_entries(image, params);
140 if (ret)
141 return ret;
142 } else
143 setup_memory_map_entries(params);
144
129 nr_e820_entries = params->e820_entries; 145 nr_e820_entries = params->e820_entries;
130 146
131 for (i = 0; i < nr_e820_entries; i++) { 147 for (i = 0; i < nr_e820_entries; i++) {
@@ -153,7 +169,7 @@ static int setup_boot_parameters(struct boot_params *params)
153 memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer, 169 memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
154 EDD_MBR_SIG_MAX * sizeof(unsigned int)); 170 EDD_MBR_SIG_MAX * sizeof(unsigned int));
155 171
156 return 0; 172 return ret;
157} 173}
158 174
159int bzImage64_probe(const char *buf, unsigned long len) 175int bzImage64_probe(const char *buf, unsigned long len)
@@ -241,6 +257,22 @@ void *bzImage64_load(struct kimage *image, char *kernel,
241 } 257 }
242 258
243 /* 259 /*
260 * In case of crash dump, we will append elfcorehdr=<addr> to
261 * command line. Make sure it does not overflow
262 */
263 if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
264 pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
265 return ERR_PTR(-EINVAL);
266 }
267
268 /* Allocate and load backup region */
269 if (image->type == KEXEC_TYPE_CRASH) {
270 ret = crash_load_segments(image);
271 if (ret)
272 return ERR_PTR(ret);
273 }
274
275 /*
244 * Load purgatory. For 64bit entry point, purgatory code can be 276 * Load purgatory. For 64bit entry point, purgatory code can be
245 * anywhere. 277 * anywhere.
246 */ 278 */
@@ -254,7 +286,8 @@ void *bzImage64_load(struct kimage *image, char *kernel,
254 pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); 286 pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
255 287
256 /* Load Bootparams and cmdline */ 288 /* Load Bootparams and cmdline */
257 params_cmdline_sz = sizeof(struct boot_params) + cmdline_len; 289 params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
290 MAX_ELFCOREHDR_STR_LEN;
258 params = kzalloc(params_cmdline_sz, GFP_KERNEL); 291 params = kzalloc(params_cmdline_sz, GFP_KERNEL);
259 if (!params) 292 if (!params)
260 return ERR_PTR(-ENOMEM); 293 return ERR_PTR(-ENOMEM);
@@ -303,8 +336,8 @@ void *bzImage64_load(struct kimage *image, char *kernel,
303 setup_initrd(params, initrd_load_addr, initrd_len); 336 setup_initrd(params, initrd_load_addr, initrd_len);
304 } 337 }
305 338
306 setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params), 339 setup_cmdline(image, params, bootparam_load_addr,
307 cmdline, cmdline_len); 340 sizeof(struct boot_params), cmdline, cmdline_len);
308 341
309 /* bootloader info. Do we need a separate ID for kexec kernel loader? */ 342 /* bootloader info. Do we need a separate ID for kexec kernel loader? */
310 params->hdr.type_of_loader = 0x0D << 4; 343 params->hdr.type_of_loader = 0x0D << 4;
@@ -332,7 +365,9 @@ void *bzImage64_load(struct kimage *image, char *kernel,
332 if (ret) 365 if (ret)
333 goto out_free_params; 366 goto out_free_params;
334 367
335 setup_boot_parameters(params); 368 ret = setup_boot_parameters(image, params);
369 if (ret)
370 goto out_free_params;
336 371
337 /* Allocate loader specific data */ 372 /* Allocate loader specific data */
338 ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL); 373 ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 18d0f9e0b6da..9330434da777 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -178,6 +178,38 @@ static void load_segments(void)
178 ); 178 );
179} 179}
180 180
181/* Update purgatory as needed after various image segments have been prepared */
182static int arch_update_purgatory(struct kimage *image)
183{
184 int ret = 0;
185
186 if (!image->file_mode)
187 return 0;
188
189 /* Setup copying of backup region */
190 if (image->type == KEXEC_TYPE_CRASH) {
191 ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
192 &image->arch.backup_load_addr,
193 sizeof(image->arch.backup_load_addr), 0);
194 if (ret)
195 return ret;
196
197 ret = kexec_purgatory_get_set_symbol(image, "backup_src",
198 &image->arch.backup_src_start,
199 sizeof(image->arch.backup_src_start), 0);
200 if (ret)
201 return ret;
202
203 ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
204 &image->arch.backup_src_sz,
205 sizeof(image->arch.backup_src_sz), 0);
206 if (ret)
207 return ret;
208 }
209
210 return ret;
211}
212
181int machine_kexec_prepare(struct kimage *image) 213int machine_kexec_prepare(struct kimage *image)
182{ 214{
183 unsigned long start_pgtable; 215 unsigned long start_pgtable;
@@ -191,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
191 if (result) 223 if (result)
192 return result; 224 return result;
193 225
226 /* update purgatory as needed */
227 result = arch_update_purgatory(image);
228 if (result)
229 return result;
230
194 return 0; 231 return 0;
195} 232}
196 233
@@ -315,6 +352,9 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
315 352
316void *arch_kexec_kernel_image_load(struct kimage *image) 353void *arch_kexec_kernel_image_load(struct kimage *image)
317{ 354{
355 vfree(image->arch.elf_headers);
356 image->arch.elf_headers = NULL;
357
318 if (!image->fops || !image->fops->load) 358 if (!image->fops || !image->fops->load)
319 return ERR_PTR(-ENOEXEC); 359 return ERR_PTR(-ENOEXEC);
320 360
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
index be3249d7ed2d..d1a4291d3568 100644
--- a/arch/x86/purgatory/entry64.S
+++ b/arch/x86/purgatory/entry64.S
@@ -61,13 +61,13 @@ new_cs_exit:
61 .balign 4 61 .balign 4
62entry64_regs: 62entry64_regs:
63rax: .quad 0x0 63rax: .quad 0x0
64rbx: .quad 0x0
65rcx: .quad 0x0 64rcx: .quad 0x0
66rdx: .quad 0x0 65rdx: .quad 0x0
67rsi: .quad 0x0 66rbx: .quad 0x0
68rdi: .quad 0x0
69rsp: .quad 0x0 67rsp: .quad 0x0
70rbp: .quad 0x0 68rbp: .quad 0x0
69rsi: .quad 0x0
70rdi: .quad 0x0
71r8: .quad 0x0 71r8: .quad 0x0
72r9: .quad 0x0 72r9: .quad 0x0
73r10: .quad 0x0 73r10: .quad 0x0
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 0926f2a3ed03..f18c780f9716 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -548,6 +548,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
548{ 548{
549 int ret; 549 int ret;
550 struct kimage *image; 550 struct kimage *image;
551 bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
551 552
552 image = do_kimage_alloc_init(); 553 image = do_kimage_alloc_init();
553 if (!image) 554 if (!image)
@@ -555,6 +556,12 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
555 556
556 image->file_mode = 1; 557 image->file_mode = 1;
557 558
559 if (kexec_on_panic) {
560 /* Enable special crash kernel control page alloc policy. */
561 image->control_page = crashk_res.start;
562 image->type = KEXEC_TYPE_CRASH;
563 }
564
558 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, 565 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
559 cmdline_ptr, cmdline_len, flags); 566 cmdline_ptr, cmdline_len, flags);
560 if (ret) 567 if (ret)
@@ -572,10 +579,12 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
572 goto out_free_post_load_bufs; 579 goto out_free_post_load_bufs;
573 } 580 }
574 581
575 image->swap_page = kimage_alloc_control_pages(image, 0); 582 if (!kexec_on_panic) {
576 if (!image->swap_page) { 583 image->swap_page = kimage_alloc_control_pages(image, 0);
577 pr_err(KERN_ERR "Could not allocate swap buffer\n"); 584 if (!image->swap_page) {
578 goto out_free_control_pages; 585 pr_err(KERN_ERR "Could not allocate swap buffer\n");
586 goto out_free_control_pages;
587 }
579 } 588 }
580 589
581 *rimage = image; 590 *rimage = image;
@@ -1113,10 +1122,14 @@ static int kimage_load_crash_segment(struct kimage *image,
1113 unsigned long maddr; 1122 unsigned long maddr;
1114 size_t ubytes, mbytes; 1123 size_t ubytes, mbytes;
1115 int result; 1124 int result;
1116 unsigned char __user *buf; 1125 unsigned char __user *buf = NULL;
1126 unsigned char *kbuf = NULL;
1117 1127
1118 result = 0; 1128 result = 0;
1119 buf = segment->buf; 1129 if (image->file_mode)
1130 kbuf = segment->kbuf;
1131 else
1132 buf = segment->buf;
1120 ubytes = segment->bufsz; 1133 ubytes = segment->bufsz;
1121 mbytes = segment->memsz; 1134 mbytes = segment->memsz;
1122 maddr = segment->mem; 1135 maddr = segment->mem;
@@ -1139,7 +1152,12 @@ static int kimage_load_crash_segment(struct kimage *image,
1139 /* Zero the trailing part of the page */ 1152 /* Zero the trailing part of the page */
1140 memset(ptr + uchunk, 0, mchunk - uchunk); 1153 memset(ptr + uchunk, 0, mchunk - uchunk);
1141 } 1154 }
1142 result = copy_from_user(ptr, buf, uchunk); 1155
1156 /* For file based kexec, source pages are in kernel memory */
1157 if (image->file_mode)
1158 memcpy(ptr, kbuf, uchunk);
1159 else
1160 result = copy_from_user(ptr, buf, uchunk);
1143 kexec_flush_icache_page(page); 1161 kexec_flush_icache_page(page);
1144 kunmap(page); 1162 kunmap(page);
1145 if (result) { 1163 if (result) {
@@ -1148,7 +1166,10 @@ static int kimage_load_crash_segment(struct kimage *image,
1148 } 1166 }
1149 ubytes -= uchunk; 1167 ubytes -= uchunk;
1150 maddr += mchunk; 1168 maddr += mchunk;
1151 buf += mchunk; 1169 if (image->file_mode)
1170 kbuf += mchunk;
1171 else
1172 buf += mchunk;
1152 mbytes -= mchunk; 1173 mbytes -= mchunk;
1153 } 1174 }
1154out: 1175out:
@@ -2127,7 +2148,14 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
2127 kbuf->top_down = top_down; 2148 kbuf->top_down = top_down;
2128 2149
2129 /* Walk the RAM ranges and allocate a suitable range for the buffer */ 2150 /* Walk the RAM ranges and allocate a suitable range for the buffer */
2130 ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback); 2151 if (image->type == KEXEC_TYPE_CRASH)
2152 ret = walk_iomem_res("Crash kernel",
2153 IORESOURCE_MEM | IORESOURCE_BUSY,
2154 crashk_res.start, crashk_res.end, kbuf,
2155 locate_mem_hole_callback);
2156 else
2157 ret = walk_system_ram_res(0, -1, kbuf,
2158 locate_mem_hole_callback);
2131 if (ret != 1) { 2159 if (ret != 1) {
2132 /* A suitable memory range could not be found for buffer */ 2160 /* A suitable memory range could not be found for buffer */
2133 return -EADDRNOTAVAIL; 2161 return -EADDRNOTAVAIL;