aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2012-02-15 20:14:22 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-02-22 18:50:01 -0500
commiteb39c8803d0e3d98fe74825f99287f63d55e6460 (patch)
treecf157bb2bb5271aaf1823e9ee3b91800c00c8b05
parent8e0aa6d436f303a37df7ec68758883ade077d123 (diff)
fadump: Reserve the memory for firmware assisted dump.
Reserve the memory during early boot to preserve CPU state data, HPTE region and RMA (real mode area) region data in case of kernel crash. At the time of crash, powerpc firmware will store CPU state data, HPTE region data and move RMA region data to the reserved memory area. If the firmware-assisted dump fails to reserve the memory, then fallback to existing kexec-based kdump. Most of the code implementation to reserve memory has been adapted from phyp assisted dump implementation written by Linas Vepstas and Manish Ahuja This patch also introduces a config option CONFIG_FA_DUMP for firmware assisted dump feature on Powerpc (ppc64) architecture. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/Kconfig13
-rw-r--r--arch/powerpc/include/asm/fadump.h71
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/fadump.c246
-rw-r--r--arch/powerpc/kernel/prom.c15
5 files changed, 345 insertions, 1 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1919634a9b32..afa4dabfad7c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -386,6 +386,19 @@ config PHYP_DUMP
386 386
387 If unsure, say "N" 387 If unsure, say "N"
388 388
389config FA_DUMP
390 bool "Firmware-assisted dump"
391 depends on PPC64 && PPC_RTAS && CRASH_DUMP
392 help
393 A robust mechanism to get reliable kernel crash dump with
394 assistance from firmware. This approach does not use kexec,
395 instead firmware assists in booting the kdump kernel
396 while preserving memory contents. Firmware-assisted dump
397 is meant to be a kdump replacement offering robustness and
398 speed not possible without system firmware assistance.
399
400 If unsure, say "N"
401
389config IRQ_ALL_CPUS 402config IRQ_ALL_CPUS
390 bool "Distribute interrupts on all CPUs by default" 403 bool "Distribute interrupts on all CPUs by default"
391 depends on SMP && !MV64360 404 depends on SMP && !MV64360
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
new file mode 100644
index 000000000000..7be25d30d985
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump.h
@@ -0,0 +1,71 @@
1/*
2 * Firmware Assisted dump header file.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2011 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#ifndef __PPC64_FA_DUMP_H__
23#define __PPC64_FA_DUMP_H__
24
25#ifdef CONFIG_FA_DUMP
26
27/*
28 * The RMA region will be saved for later dumping when kernel crashes.
29 * RMA is Real Mode Area, the first block of logical memory address owned
30 * by logical partition, containing the storage that may be accessed with
31 * translate off.
32 */
33#define RMA_START 0x0
34#define RMA_END (ppc64_rma_size)
35
36/*
37 * On some Power systems where RMO is 128MB, it still requires minimum of
38 * 256MB for kernel to boot successfully. When kdump infrastructure is
39 * configured to save vmcore over network, we run into OOM issue while
40 * loading modules related to network setup. Hence we need aditional 64M
41 * of memory to avoid OOM issue.
42 */
43#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
44 + (0x1UL << 26))
45
46/* Firmware provided dump sections */
47#define FADUMP_CPU_STATE_DATA 0x0001
48#define FADUMP_HPTE_REGION 0x0002
49#define FADUMP_REAL_MODE_REGION 0x0011
50
51struct fw_dump {
52 unsigned long cpu_state_data_size;
53 unsigned long hpte_region_size;
54 unsigned long boot_memory_size;
55 unsigned long reserve_dump_area_start;
56 unsigned long reserve_dump_area_size;
57 /* cmd line option during boot */
58 unsigned long reserve_bootvar;
59
60 int ibm_configure_kernel_dump;
61
62 unsigned long fadump_enabled:1;
63 unsigned long fadump_supported:1;
64 unsigned long dump_active:1;
65};
66
67extern int early_init_dt_scan_fw_dump(unsigned long node,
68 const char *uname, int depth, void *data);
69extern int fadump_reserve_mem(void);
70#endif
71#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ee728e433aa2..391bf7e1ba2f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o
60obj-$(CONFIG_IBMEBUS) += ibmebus.o 60obj-$(CONFIG_IBMEBUS) += ibmebus.o
61obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o 61obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
62obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 62obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
63obj-$(CONFIG_FA_DUMP) += fadump.o
63ifeq ($(CONFIG_PPC32),y) 64ifeq ($(CONFIG_PPC32),y)
64obj-$(CONFIG_E500) += idle_e500.o 65obj-$(CONFIG_E500) += idle_e500.o
65endif 66endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
new file mode 100644
index 000000000000..deb276a9ce71
--- /dev/null
+++ b/arch/powerpc/kernel/fadump.c
@@ -0,0 +1,246 @@
1/*
2 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
3 * dump with assistance from firmware. This approach does not use kexec,
4 * instead firmware assists in booting the kdump kernel while preserving
5 * memory contents. The most of the code implementation has been adapted
6 * from phyp assisted dump implementation written by Linas Vepstas and
7 * Manish Ahuja
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 *
23 * Copyright 2011 IBM Corporation
24 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
25 */
26
27#undef DEBUG
28#define pr_fmt(fmt) "fadump: " fmt
29
30#include <linux/string.h>
31#include <linux/memblock.h>
32
33#include <asm/page.h>
34#include <asm/prom.h>
35#include <asm/rtas.h>
36#include <asm/fadump.h>
37
38static struct fw_dump fw_dump;
39
40/* Scan the Firmware Assisted dump configuration details. */
41int __init early_init_dt_scan_fw_dump(unsigned long node,
42 const char *uname, int depth, void *data)
43{
44 __be32 *sections;
45 int i, num_sections;
46 unsigned long size;
47 const int *token;
48
49 if (depth != 1 || strcmp(uname, "rtas") != 0)
50 return 0;
51
52 /*
53 * Check if Firmware Assisted dump is supported. if yes, check
54 * if dump has been initiated on last reboot.
55 */
56 token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
57 if (!token)
58 return 0;
59
60 fw_dump.fadump_supported = 1;
61 fw_dump.ibm_configure_kernel_dump = *token;
62
63 /*
64 * The 'ibm,kernel-dump' rtas node is present only if there is
65 * dump data waiting for us.
66 */
67 if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL))
68 fw_dump.dump_active = 1;
69
70 /* Get the sizes required to store dump data for the firmware provided
71 * dump sections.
72 * For each dump section type supported, a 32bit cell which defines
73 * the ID of a supported section followed by two 32 bit cells which
74 * gives teh size of the section in bytes.
75 */
76 sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
77 &size);
78
79 if (!sections)
80 return 0;
81
82 num_sections = size / (3 * sizeof(u32));
83
84 for (i = 0; i < num_sections; i++, sections += 3) {
85 u32 type = (u32)of_read_number(sections, 1);
86
87 switch (type) {
88 case FADUMP_CPU_STATE_DATA:
89 fw_dump.cpu_state_data_size =
90 of_read_ulong(&sections[1], 2);
91 break;
92 case FADUMP_HPTE_REGION:
93 fw_dump.hpte_region_size =
94 of_read_ulong(&sections[1], 2);
95 break;
96 }
97 }
98 return 1;
99}
100
101/**
102 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
103 *
104 * Function to find the largest memory size we need to reserve during early
105 * boot process. This will be the size of the memory that is required for a
106 * kernel to boot successfully.
107 *
108 * This function has been taken from phyp-assisted dump feature implementation.
109 *
110 * returns larger of 256MB or 5% rounded down to multiples of 256MB.
111 *
112 * TODO: Come up with better approach to find out more accurate memory size
113 * that is required for a kernel to boot successfully.
114 *
115 */
116static inline unsigned long fadump_calculate_reserve_size(void)
117{
118 unsigned long size;
119
120 /*
121 * Check if the size is specified through fadump_reserve_mem= cmdline
122 * option. If yes, then use that.
123 */
124 if (fw_dump.reserve_bootvar)
125 return fw_dump.reserve_bootvar;
126
127 /* divide by 20 to get 5% of value */
128 size = memblock_end_of_DRAM() / 20;
129
130 /* round it down in multiples of 256 */
131 size = size & ~0x0FFFFFFFUL;
132
133 /* Truncate to memory_limit. We don't want to over reserve the memory.*/
134 if (memory_limit && size > memory_limit)
135 size = memory_limit;
136
137 return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
138}
139
140/*
141 * Calculate the total memory size required to be reserved for
142 * firmware-assisted dump registration.
143 */
144static unsigned long get_fadump_area_size(void)
145{
146 unsigned long size = 0;
147
148 size += fw_dump.cpu_state_data_size;
149 size += fw_dump.hpte_region_size;
150 size += fw_dump.boot_memory_size;
151
152 size = PAGE_ALIGN(size);
153 return size;
154}
155
156int __init fadump_reserve_mem(void)
157{
158 unsigned long base, size, memory_boundary;
159
160 if (!fw_dump.fadump_enabled)
161 return 0;
162
163 if (!fw_dump.fadump_supported) {
164 printk(KERN_INFO "Firmware-assisted dump is not supported on"
165 " this hardware\n");
166 fw_dump.fadump_enabled = 0;
167 return 0;
168 }
169 /* Initialize boot memory size */
170 fw_dump.boot_memory_size = fadump_calculate_reserve_size();
171
172 /*
173 * Calculate the memory boundary.
174 * If memory_limit is less than actual memory boundary then reserve
175 * the memory for fadump beyond the memory_limit and adjust the
176 * memory_limit accordingly, so that the running kernel can run with
177 * specified memory_limit.
178 */
179 if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
180 size = get_fadump_area_size();
181 if ((memory_limit + size) < memblock_end_of_DRAM())
182 memory_limit += size;
183 else
184 memory_limit = memblock_end_of_DRAM();
185 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
186 " dump, now %#016llx\n",
187 (unsigned long long)memory_limit);
188 }
189 if (memory_limit)
190 memory_boundary = memory_limit;
191 else
192 memory_boundary = memblock_end_of_DRAM();
193
194 if (fw_dump.dump_active) {
195 printk(KERN_INFO "Firmware-assisted dump is active.\n");
196 /*
197 * If last boot has crashed then reserve all the memory
198 * above boot_memory_size so that we don't touch it until
199 * dump is written to disk by userspace tool. This memory
200 * will be released for general use once the dump is saved.
201 */
202 base = fw_dump.boot_memory_size;
203 size = memory_boundary - base;
204 memblock_reserve(base, size);
205 printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
206 "for saving crash dump\n",
207 (unsigned long)(size >> 20),
208 (unsigned long)(base >> 20));
209 } else {
210 /* Reserve the memory at the top of memory. */
211 size = get_fadump_area_size();
212 base = memory_boundary - size;
213 memblock_reserve(base, size);
214 printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
215 "for firmware-assisted dump\n",
216 (unsigned long)(size >> 20),
217 (unsigned long)(base >> 20));
218 }
219 fw_dump.reserve_dump_area_start = base;
220 fw_dump.reserve_dump_area_size = size;
221 return 1;
222}
223
224/* Look for fadump= cmdline option. */
225static int __init early_fadump_param(char *p)
226{
227 if (!p)
228 return 1;
229
230 if (strncmp(p, "on", 2) == 0)
231 fw_dump.fadump_enabled = 1;
232 else if (strncmp(p, "off", 3) == 0)
233 fw_dump.fadump_enabled = 0;
234
235 return 0;
236}
237early_param("fadump", early_fadump_param);
238
239/* Look for fadump_reserve_mem= cmdline option */
240static int __init early_fadump_reserve_mem(char *p)
241{
242 if (p)
243 fw_dump.reserve_bootvar = memparse(p, &p);
244 return 0;
245}
246early_param("fadump_reserve_mem", early_fadump_reserve_mem);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index abe405dab34d..70222b35cfc5 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,6 +55,7 @@
55#include <asm/phyp_dump.h> 55#include <asm/phyp_dump.h>
56#include <asm/kexec.h> 56#include <asm/kexec.h>
57#include <asm/opal.h> 57#include <asm/opal.h>
58#include <asm/fadump.h>
58 59
59#include <mm/mmu_decl.h> 60#include <mm/mmu_decl.h>
60 61
@@ -719,6 +720,11 @@ void __init early_init_devtree(void *params)
719 of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); 720 of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL);
720#endif 721#endif
721 722
723#ifdef CONFIG_FA_DUMP
724 /* scan tree to see if dump is active during last boot */
725 of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
726#endif
727
722 /* Pre-initialize the cmd_line with the content of boot_commmand_line, 728 /* Pre-initialize the cmd_line with the content of boot_commmand_line,
723 * which will be empty except when the content of the variable has 729 * which will be empty except when the content of the variable has
724 * been overriden by a bootloading mechanism. This happens typically 730 * been overriden by a bootloading mechanism. This happens typically
@@ -750,7 +756,14 @@ void __init early_init_devtree(void *params)
750 if (PHYSICAL_START > MEMORY_START) 756 if (PHYSICAL_START > MEMORY_START)
751 memblock_reserve(MEMORY_START, 0x8000); 757 memblock_reserve(MEMORY_START, 0x8000);
752 reserve_kdump_trampoline(); 758 reserve_kdump_trampoline();
753 reserve_crashkernel(); 759#ifdef CONFIG_FA_DUMP
760 /*
761 * If we fail to reserve memory for firmware-assisted dump then
762 * fallback to kexec based kdump.
763 */
764 if (fadump_reserve_mem() == 0)
765#endif
766 reserve_crashkernel();
754 early_reserve_mem(); 767 early_reserve_mem();
755 phyp_dump_reserve_mem(); 768 phyp_dump_reserve_mem();
756 769