aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2012-02-15 20:14:45 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-02-22 18:50:01 -0500
commitebaeb5ae24379b5b635dc1d1fa6df904bc95b4d9 (patch)
tree4c1d54e9ec25f48dd05708ed62d5eb6db9d0ba9d
parent2df173d9e85d9e2c6a8933c63f0c034accff7e0f (diff)
fadump: Convert firmware-assisted cpu state dump data into elf notes.
When registered for firmware assisted dump on powerpc, firmware preserves the registers for the active CPUs during a system crash. This patch reads the cpu register data stored in Firmware-assisted dump format (except for crashing cpu) and converts it into elf notes and updates the PT_NOTE program header accordingly. The exact register state for crashing cpu is saved to fadump crash info structure in scratch area during crash_fadump() and read during second kernel boot. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/fadump.h44
-rw-r--r--arch/powerpc/kernel/fadump.c314
-rw-r--r--arch/powerpc/kernel/setup-common.c6
-rw-r--r--arch/powerpc/kernel/traps.c3
4 files changed, 365 insertions, 2 deletions
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 9e172a5d53c..67681958e4b 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -65,6 +65,18 @@
65/* Dump status flag */ 65/* Dump status flag */
66#define FADUMP_ERROR_FLAG 0x2000 66#define FADUMP_ERROR_FLAG 0x2000
67 67
68#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
69
70#define CPU_UNKNOWN (~((u32)0))
71
72/* Utility macros */
73#define SKIP_TO_NEXT_CPU(reg_entry) \
74({ \
75 while (reg_entry->reg_id != REG_ID("CPUEND")) \
76 reg_entry++; \
77 reg_entry++; \
78})
79
68/* Kernel Dump section info */ 80/* Kernel Dump section info */
69struct fadump_section { 81struct fadump_section {
70 u32 request_flag; 82 u32 request_flag;
@@ -119,6 +131,9 @@ struct fw_dump {
119 unsigned long reserve_bootvar; 131 unsigned long reserve_bootvar;
120 132
121 unsigned long fadumphdr_addr; 133 unsigned long fadumphdr_addr;
134 unsigned long cpu_notes_buf;
135 unsigned long cpu_notes_buf_size;
136
122 int ibm_configure_kernel_dump; 137 int ibm_configure_kernel_dump;
123 138
124 unsigned long fadump_enabled:1; 139 unsigned long fadump_enabled:1;
@@ -143,13 +158,40 @@ static inline u64 str_to_u64(const char *str)
143 return val; 158 return val;
144} 159}
145#define STR_TO_HEX(x) str_to_u64(x) 160#define STR_TO_HEX(x) str_to_u64(x)
161#define REG_ID(x) str_to_u64(x)
146 162
147#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF") 163#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
164#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
165
166/* The firmware-assisted dump format.
167 *
168 * The register save area is an area in the partition's memory used to preserve
169 * the register contents (CPU state data) for the active CPUs during a firmware
170 * assisted dump. The dump format contains register save area header followed
171 * by register entries. Each list of registers for a CPU starts with
172 * "CPUSTRT" and ends with "CPUEND".
173 */
174
175/* Register save area header. */
176struct fadump_reg_save_area_header {
177 u64 magic_number;
178 u32 version;
179 u32 num_cpu_offset;
180};
181
182/* Register entry. */
183struct fadump_reg_entry {
184 u64 reg_id;
185 u64 reg_value;
186};
148 187
149/* fadump crash info structure */ 188/* fadump crash info structure */
150struct fadump_crash_info_header { 189struct fadump_crash_info_header {
151 u64 magic_number; 190 u64 magic_number;
152 u64 elfcorehdr_addr; 191 u64 elfcorehdr_addr;
192 u32 crashing_cpu;
193 struct pt_regs regs;
194 struct cpumask cpu_online_mask;
153}; 195};
154 196
155/* Crash memory ranges */ 197/* Crash memory ranges */
@@ -165,7 +207,9 @@ extern int early_init_dt_scan_fw_dump(unsigned long node,
165extern int fadump_reserve_mem(void); 207extern int fadump_reserve_mem(void);
166extern int setup_fadump(void); 208extern int setup_fadump(void);
167extern int is_fadump_active(void); 209extern int is_fadump_active(void);
210extern void crash_fadump(struct pt_regs *, const char *);
168#else /* CONFIG_FA_DUMP */ 211#else /* CONFIG_FA_DUMP */
169static inline int is_fadump_active(void) { return 0; } 212static inline int is_fadump_active(void) { return 0; }
213static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
170#endif 214#endif
171#endif 215#endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 63857e183de..da68bdad194 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -240,6 +240,7 @@ static unsigned long get_fadump_area_size(void)
240 size += fw_dump.boot_memory_size; 240 size += fw_dump.boot_memory_size;
241 size += sizeof(struct fadump_crash_info_header); 241 size += sizeof(struct fadump_crash_info_header);
242 size += sizeof(struct elfhdr); /* ELF core header.*/ 242 size += sizeof(struct elfhdr); /* ELF core header.*/
243 size += sizeof(struct elf_phdr); /* place holder for cpu notes */
243 /* Program headers for crash memory regions. */ 244 /* Program headers for crash memory regions. */
244 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); 245 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
245 246
@@ -393,6 +394,285 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
393 } 394 }
394} 395}
395 396
397void crash_fadump(struct pt_regs *regs, const char *str)
398{
399 struct fadump_crash_info_header *fdh = NULL;
400
401 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
402 return;
403
404 fdh = __va(fw_dump.fadumphdr_addr);
405 crashing_cpu = smp_processor_id();
406 fdh->crashing_cpu = crashing_cpu;
407 crash_save_vmcoreinfo();
408
409 if (regs)
410 fdh->regs = *regs;
411 else
412 ppc_save_regs(&fdh->regs);
413
414 fdh->cpu_online_mask = *cpu_online_mask;
415
416 /* Call ibm,os-term rtas call to trigger firmware assisted dump */
417 rtas_os_term((char *)str);
418}
419
420#define GPR_MASK 0xffffff0000000000
421static inline int fadump_gpr_index(u64 id)
422{
423 int i = -1;
424 char str[3];
425
426 if ((id & GPR_MASK) == REG_ID("GPR")) {
427 /* get the digits at the end */
428 id &= ~GPR_MASK;
429 id >>= 24;
430 str[2] = '\0';
431 str[1] = id & 0xff;
432 str[0] = (id >> 8) & 0xff;
433 sscanf(str, "%d", &i);
434 if (i > 31)
435 i = -1;
436 }
437 return i;
438}
439
440static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
441 u64 reg_val)
442{
443 int i;
444
445 i = fadump_gpr_index(reg_id);
446 if (i >= 0)
447 regs->gpr[i] = (unsigned long)reg_val;
448 else if (reg_id == REG_ID("NIA"))
449 regs->nip = (unsigned long)reg_val;
450 else if (reg_id == REG_ID("MSR"))
451 regs->msr = (unsigned long)reg_val;
452 else if (reg_id == REG_ID("CTR"))
453 regs->ctr = (unsigned long)reg_val;
454 else if (reg_id == REG_ID("LR"))
455 regs->link = (unsigned long)reg_val;
456 else if (reg_id == REG_ID("XER"))
457 regs->xer = (unsigned long)reg_val;
458 else if (reg_id == REG_ID("CR"))
459 regs->ccr = (unsigned long)reg_val;
460 else if (reg_id == REG_ID("DAR"))
461 regs->dar = (unsigned long)reg_val;
462 else if (reg_id == REG_ID("DSISR"))
463 regs->dsisr = (unsigned long)reg_val;
464}
465
466static struct fadump_reg_entry*
467fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
468{
469 memset(regs, 0, sizeof(struct pt_regs));
470
471 while (reg_entry->reg_id != REG_ID("CPUEND")) {
472 fadump_set_regval(regs, reg_entry->reg_id,
473 reg_entry->reg_value);
474 reg_entry++;
475 }
476 reg_entry++;
477 return reg_entry;
478}
479
480static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
481 void *data, size_t data_len)
482{
483 struct elf_note note;
484
485 note.n_namesz = strlen(name) + 1;
486 note.n_descsz = data_len;
487 note.n_type = type;
488 memcpy(buf, &note, sizeof(note));
489 buf += (sizeof(note) + 3)/4;
490 memcpy(buf, name, note.n_namesz);
491 buf += (note.n_namesz + 3)/4;
492 memcpy(buf, data, note.n_descsz);
493 buf += (note.n_descsz + 3)/4;
494
495 return buf;
496}
497
498static void fadump_final_note(u32 *buf)
499{
500 struct elf_note note;
501
502 note.n_namesz = 0;
503 note.n_descsz = 0;
504 note.n_type = 0;
505 memcpy(buf, &note, sizeof(note));
506}
507
508static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
509{
510 struct elf_prstatus prstatus;
511
512 memset(&prstatus, 0, sizeof(prstatus));
513 /*
514 * FIXME: How do i get PID? Do I really need it?
515 * prstatus.pr_pid = ????
516 */
517 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
518 buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
519 &prstatus, sizeof(prstatus));
520 return buf;
521}
522
523static void fadump_update_elfcore_header(char *bufp)
524{
525 struct elfhdr *elf;
526 struct elf_phdr *phdr;
527
528 elf = (struct elfhdr *)bufp;
529 bufp += sizeof(struct elfhdr);
530
531 /* First note is a place holder for cpu notes info. */
532 phdr = (struct elf_phdr *)bufp;
533
534 if (phdr->p_type == PT_NOTE) {
535 phdr->p_paddr = fw_dump.cpu_notes_buf;
536 phdr->p_offset = phdr->p_paddr;
537 phdr->p_filesz = fw_dump.cpu_notes_buf_size;
538 phdr->p_memsz = fw_dump.cpu_notes_buf_size;
539 }
540 return;
541}
542
543static void *fadump_cpu_notes_buf_alloc(unsigned long size)
544{
545 void *vaddr;
546 struct page *page;
547 unsigned long order, count, i;
548
549 order = get_order(size);
550 vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
551 if (!vaddr)
552 return NULL;
553
554 count = 1 << order;
555 page = virt_to_page(vaddr);
556 for (i = 0; i < count; i++)
557 SetPageReserved(page + i);
558 return vaddr;
559}
560
561static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
562{
563 struct page *page;
564 unsigned long order, count, i;
565
566 order = get_order(size);
567 count = 1 << order;
568 page = virt_to_page(vaddr);
569 for (i = 0; i < count; i++)
570 ClearPageReserved(page + i);
571 __free_pages(page, order);
572}
573
574/*
575 * Read CPU state dump data and convert it into ELF notes.
576 * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
577 * used to access the data to allow for additional fields to be added without
578 * affecting compatibility. Each list of registers for a CPU starts with
579 * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
580 * 8 Byte ASCII identifier and 8 Byte register value. The register entry
581 * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
582 * of register value. For more details refer to PAPR document.
583 *
584 * Only for the crashing cpu we ignore the CPU dump data and get exact
585 * state from fadump crash info structure populated by first kernel at the
586 * time of crash.
587 */
588static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
589{
590 struct fadump_reg_save_area_header *reg_header;
591 struct fadump_reg_entry *reg_entry;
592 struct fadump_crash_info_header *fdh = NULL;
593 void *vaddr;
594 unsigned long addr;
595 u32 num_cpus, *note_buf;
596 struct pt_regs regs;
597 int i, rc = 0, cpu = 0;
598
599 if (!fdm->cpu_state_data.bytes_dumped)
600 return -EINVAL;
601
602 addr = fdm->cpu_state_data.destination_address;
603 vaddr = __va(addr);
604
605 reg_header = vaddr;
606 if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
607 printk(KERN_ERR "Unable to read register save area.\n");
608 return -ENOENT;
609 }
610 pr_debug("--------CPU State Data------------\n");
611 pr_debug("Magic Number: %llx\n", reg_header->magic_number);
612 pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
613
614 vaddr += reg_header->num_cpu_offset;
615 num_cpus = *((u32 *)(vaddr));
616 pr_debug("NumCpus : %u\n", num_cpus);
617 vaddr += sizeof(u32);
618 reg_entry = (struct fadump_reg_entry *)vaddr;
619
620 /* Allocate buffer to hold cpu crash notes. */
621 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
622 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
623 note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
624 if (!note_buf) {
625 printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
626 "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
627 return -ENOMEM;
628 }
629 fw_dump.cpu_notes_buf = __pa(note_buf);
630
631 pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
632 (num_cpus * sizeof(note_buf_t)), note_buf);
633
634 if (fw_dump.fadumphdr_addr)
635 fdh = __va(fw_dump.fadumphdr_addr);
636
637 for (i = 0; i < num_cpus; i++) {
638 if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
639 printk(KERN_ERR "Unable to read CPU state data\n");
640 rc = -ENOENT;
641 goto error_out;
642 }
643 /* Lower 4 bytes of reg_value contains logical cpu id */
644 cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
645 if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
646 SKIP_TO_NEXT_CPU(reg_entry);
647 continue;
648 }
649 pr_debug("Reading register data for cpu %d...\n", cpu);
650 if (fdh && fdh->crashing_cpu == cpu) {
651 regs = fdh->regs;
652 note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
653 SKIP_TO_NEXT_CPU(reg_entry);
654 } else {
655 reg_entry++;
656 reg_entry = fadump_read_registers(reg_entry, &regs);
657 note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
658 }
659 }
660 fadump_final_note(note_buf);
661
662 pr_debug("Updating elfcore header (%llx) with cpu notes\n",
663 fdh->elfcorehdr_addr);
664 fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
665 return 0;
666
667error_out:
668 fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
669 fw_dump.cpu_notes_buf_size);
670 fw_dump.cpu_notes_buf = 0;
671 fw_dump.cpu_notes_buf_size = 0;
672 return rc;
673
674}
675
396/* 676/*
397 * Validate and process the dump data stored by firmware before exporting 677 * Validate and process the dump data stored by firmware before exporting
398 * it through '/proc/vmcore'. 678 * it through '/proc/vmcore'.
@@ -400,18 +680,21 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
400static int __init process_fadump(const struct fadump_mem_struct *fdm_active) 680static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
401{ 681{
402 struct fadump_crash_info_header *fdh; 682 struct fadump_crash_info_header *fdh;
683 int rc = 0;
403 684
404 if (!fdm_active || !fw_dump.fadumphdr_addr) 685 if (!fdm_active || !fw_dump.fadumphdr_addr)
405 return -EINVAL; 686 return -EINVAL;
406 687
407 /* Check if the dump data is valid. */ 688 /* Check if the dump data is valid. */
408 if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || 689 if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
690 (fdm_active->cpu_state_data.error_flags != 0) ||
409 (fdm_active->rmr_region.error_flags != 0)) { 691 (fdm_active->rmr_region.error_flags != 0)) {
410 printk(KERN_ERR "Dump taken by platform is not valid\n"); 692 printk(KERN_ERR "Dump taken by platform is not valid\n");
411 return -EINVAL; 693 return -EINVAL;
412 } 694 }
413 if (fdm_active->rmr_region.bytes_dumped != 695 if ((fdm_active->rmr_region.bytes_dumped !=
414 fdm_active->rmr_region.source_len) { 696 fdm_active->rmr_region.source_len) ||
697 !fdm_active->cpu_state_data.bytes_dumped) {
415 printk(KERN_ERR "Dump taken by platform is incomplete\n"); 698 printk(KERN_ERR "Dump taken by platform is incomplete\n");
416 return -EINVAL; 699 return -EINVAL;
417 } 700 }
@@ -423,6 +706,10 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
423 return -EINVAL; 706 return -EINVAL;
424 } 707 }
425 708
709 rc = fadump_build_cpu_notes(fdm_active);
710 if (rc)
711 return rc;
712
426 /* 713 /*
427 * We are done validating dump info and elfcore header is now ready 714 * We are done validating dump info and elfcore header is now ready
428 * to be exported. set elfcorehdr_addr so that vmcore module will 715 * to be exported. set elfcorehdr_addr so that vmcore module will
@@ -537,6 +824,27 @@ static int fadump_create_elfcore_headers(char *bufp)
537 elf = (struct elfhdr *)bufp; 824 elf = (struct elfhdr *)bufp;
538 bufp += sizeof(struct elfhdr); 825 bufp += sizeof(struct elfhdr);
539 826
827 /*
828 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
829 * will be populated during second kernel boot after crash. Hence
830 * this PT_NOTE will always be the first elf note.
831 *
832 * NOTE: Any new ELF note addition should be placed after this note.
833 */
834 phdr = (struct elf_phdr *)bufp;
835 bufp += sizeof(struct elf_phdr);
836 phdr->p_type = PT_NOTE;
837 phdr->p_flags = 0;
838 phdr->p_vaddr = 0;
839 phdr->p_align = 0;
840
841 phdr->p_offset = 0;
842 phdr->p_paddr = 0;
843 phdr->p_filesz = 0;
844 phdr->p_memsz = 0;
845
846 (elf->e_phnum)++;
847
540 /* setup PT_LOAD sections. */ 848 /* setup PT_LOAD sections. */
541 849
542 for (i = 0; i < crash_mem_ranges; i++) { 850 for (i = 0; i < crash_mem_ranges; i++) {
@@ -588,6 +896,8 @@ static unsigned long init_fadump_header(unsigned long addr)
588 memset(fdh, 0, sizeof(struct fadump_crash_info_header)); 896 memset(fdh, 0, sizeof(struct fadump_crash_info_header));
589 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; 897 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
590 fdh->elfcorehdr_addr = addr; 898 fdh->elfcorehdr_addr = addr;
899 /* We will set the crashing cpu id in crash_fadump() during crash. */
900 fdh->crashing_cpu = CPU_UNKNOWN;
591 901
592 return addr; 902 return addr;
593} 903}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 77bb77da05c..4e62a56e1a9 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -61,6 +61,7 @@
61#include <asm/xmon.h> 61#include <asm/xmon.h>
62#include <asm/cputhreads.h> 62#include <asm/cputhreads.h>
63#include <mm/mmu_decl.h> 63#include <mm/mmu_decl.h>
64#include <asm/fadump.h>
64 65
65#include "setup.h" 66#include "setup.h"
66 67
@@ -639,6 +640,11 @@ EXPORT_SYMBOL(check_legacy_ioport);
639static int ppc_panic_event(struct notifier_block *this, 640static int ppc_panic_event(struct notifier_block *this,
640 unsigned long event, void *ptr) 641 unsigned long event, void *ptr)
641{ 642{
643 /*
644 * If firmware-assisted dump has been registered then trigger
645 * firmware-assisted dump and let firmware handle everything else.
646 */
647 crash_fadump(NULL, ptr);
642 ppc_md.panic(ptr); /* May not return */ 648 ppc_md.panic(ptr); /* May not return */
643 return NOTIFY_DONE; 649 return NOTIFY_DONE;
644} 650}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c091527efd8..5d40e592ffc 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -57,6 +57,7 @@
57#include <asm/kexec.h> 57#include <asm/kexec.h>
58#include <asm/ppc-opcode.h> 58#include <asm/ppc-opcode.h>
59#include <asm/rio.h> 59#include <asm/rio.h>
60#include <asm/fadump.h>
60 61
61#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) 62#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
62int (*__debugger)(struct pt_regs *regs) __read_mostly; 63int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
145 arch_spin_unlock(&die_lock); 146 arch_spin_unlock(&die_lock);
146 raw_local_irq_restore(flags); 147 raw_local_irq_restore(flags);
147 148
149 crash_fadump(regs, "die oops");
150
148 /* 151 /*
149 * A system reset (0x100) is a request to dump, so we always send 152 * A system reset (0x100) is a request to dump, so we always send
150 * it through the crashdump code. 153 * it through the crashdump code.