diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/Kconfig | 13 | ||||
-rw-r--r-- | arch/powerpc/include/asm/fadump.h | 71 | ||||
-rw-r--r-- | arch/powerpc/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/fadump.c | 246 | ||||
-rw-r--r-- | arch/powerpc/kernel/prom.c | 15 |
5 files changed, 345 insertions, 1 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1919634a9b32..afa4dabfad7c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -386,6 +386,19 @@ config PHYP_DUMP | |||
386 | 386 | ||
387 | If unsure, say "N" | 387 | If unsure, say "N" |
388 | 388 | ||
389 | config FA_DUMP | ||
390 | bool "Firmware-assisted dump" | ||
391 | depends on PPC64 && PPC_RTAS && CRASH_DUMP | ||
392 | help | ||
393 | A robust mechanism to get reliable kernel crash dump with | ||
394 | assistance from firmware. This approach does not use kexec, | ||
395 | instead firmware assists in booting the kdump kernel | ||
396 | while preserving memory contents. Firmware-assisted dump | ||
397 | is meant to be a kdump replacement offering robustness and | ||
398 | speed not possible without system firmware assistance. | ||
399 | |||
400 | If unsure, say "N" | ||
401 | |||
389 | config IRQ_ALL_CPUS | 402 | config IRQ_ALL_CPUS |
390 | bool "Distribute interrupts on all CPUs by default" | 403 | bool "Distribute interrupts on all CPUs by default" |
391 | depends on SMP && !MV64360 | 404 | depends on SMP && !MV64360 |
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h new file mode 100644 index 000000000000..7be25d30d985 --- /dev/null +++ b/arch/powerpc/include/asm/fadump.h | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * Firmware Assisted dump header file. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright 2011 IBM Corporation | ||
19 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #ifndef __PPC64_FA_DUMP_H__ | ||
23 | #define __PPC64_FA_DUMP_H__ | ||
24 | |||
25 | #ifdef CONFIG_FA_DUMP | ||
26 | |||
27 | /* | ||
28 | * The RMA region will be saved for later dumping when kernel crashes. | ||
29 | * RMA is Real Mode Area, the first block of logical memory address owned | ||
30 | * by logical partition, containing the storage that may be accessed with | ||
31 | * translate off. | ||
32 | */ | ||
33 | #define RMA_START 0x0 | ||
34 | #define RMA_END (ppc64_rma_size) | ||
35 | |||
36 | /* | ||
37 | * On some Power systems where RMO is 128MB, it still requires minimum of | ||
38 | * 256MB for kernel to boot successfully. When kdump infrastructure is | ||
39 | * configured to save vmcore over network, we run into OOM issue while | ||
40 | * loading modules related to network setup. Hence we need aditional 64M | ||
41 | * of memory to avoid OOM issue. | ||
42 | */ | ||
43 | #define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \ | ||
44 | + (0x1UL << 26)) | ||
45 | |||
46 | /* Firmware provided dump sections */ | ||
47 | #define FADUMP_CPU_STATE_DATA 0x0001 | ||
48 | #define FADUMP_HPTE_REGION 0x0002 | ||
49 | #define FADUMP_REAL_MODE_REGION 0x0011 | ||
50 | |||
51 | struct fw_dump { | ||
52 | unsigned long cpu_state_data_size; | ||
53 | unsigned long hpte_region_size; | ||
54 | unsigned long boot_memory_size; | ||
55 | unsigned long reserve_dump_area_start; | ||
56 | unsigned long reserve_dump_area_size; | ||
57 | /* cmd line option during boot */ | ||
58 | unsigned long reserve_bootvar; | ||
59 | |||
60 | int ibm_configure_kernel_dump; | ||
61 | |||
62 | unsigned long fadump_enabled:1; | ||
63 | unsigned long fadump_supported:1; | ||
64 | unsigned long dump_active:1; | ||
65 | }; | ||
66 | |||
67 | extern int early_init_dt_scan_fw_dump(unsigned long node, | ||
68 | const char *uname, int depth, void *data); | ||
69 | extern int fadump_reserve_mem(void); | ||
70 | #endif | ||
71 | #endif | ||
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ee728e433aa2..391bf7e1ba2f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o | |||
60 | obj-$(CONFIG_IBMEBUS) += ibmebus.o | 60 | obj-$(CONFIG_IBMEBUS) += ibmebus.o |
61 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o | 61 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o |
62 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 62 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
63 | obj-$(CONFIG_FA_DUMP) += fadump.o | ||
63 | ifeq ($(CONFIG_PPC32),y) | 64 | ifeq ($(CONFIG_PPC32),y) |
64 | obj-$(CONFIG_E500) += idle_e500.o | 65 | obj-$(CONFIG_E500) += idle_e500.o |
65 | endif | 66 | endif |
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c new file mode 100644 index 000000000000..deb276a9ce71 --- /dev/null +++ b/arch/powerpc/kernel/fadump.c | |||
@@ -0,0 +1,246 @@ | |||
1 | /* | ||
2 | * Firmware Assisted dump: A robust mechanism to get reliable kernel crash | ||
3 | * dump with assistance from firmware. This approach does not use kexec, | ||
4 | * instead firmware assists in booting the kdump kernel while preserving | ||
5 | * memory contents. The most of the code implementation has been adapted | ||
6 | * from phyp assisted dump implementation written by Linas Vepstas and | ||
7 | * Manish Ahuja | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
22 | * | ||
23 | * Copyright 2011 IBM Corporation | ||
24 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | ||
25 | */ | ||
26 | |||
27 | #undef DEBUG | ||
28 | #define pr_fmt(fmt) "fadump: " fmt | ||
29 | |||
30 | #include <linux/string.h> | ||
31 | #include <linux/memblock.h> | ||
32 | |||
33 | #include <asm/page.h> | ||
34 | #include <asm/prom.h> | ||
35 | #include <asm/rtas.h> | ||
36 | #include <asm/fadump.h> | ||
37 | |||
38 | static struct fw_dump fw_dump; | ||
39 | |||
40 | /* Scan the Firmware Assisted dump configuration details. */ | ||
41 | int __init early_init_dt_scan_fw_dump(unsigned long node, | ||
42 | const char *uname, int depth, void *data) | ||
43 | { | ||
44 | __be32 *sections; | ||
45 | int i, num_sections; | ||
46 | unsigned long size; | ||
47 | const int *token; | ||
48 | |||
49 | if (depth != 1 || strcmp(uname, "rtas") != 0) | ||
50 | return 0; | ||
51 | |||
52 | /* | ||
53 | * Check if Firmware Assisted dump is supported. if yes, check | ||
54 | * if dump has been initiated on last reboot. | ||
55 | */ | ||
56 | token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); | ||
57 | if (!token) | ||
58 | return 0; | ||
59 | |||
60 | fw_dump.fadump_supported = 1; | ||
61 | fw_dump.ibm_configure_kernel_dump = *token; | ||
62 | |||
63 | /* | ||
64 | * The 'ibm,kernel-dump' rtas node is present only if there is | ||
65 | * dump data waiting for us. | ||
66 | */ | ||
67 | if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL)) | ||
68 | fw_dump.dump_active = 1; | ||
69 | |||
70 | /* Get the sizes required to store dump data for the firmware provided | ||
71 | * dump sections. | ||
72 | * For each dump section type supported, a 32bit cell which defines | ||
73 | * the ID of a supported section followed by two 32 bit cells which | ||
74 | * gives teh size of the section in bytes. | ||
75 | */ | ||
76 | sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", | ||
77 | &size); | ||
78 | |||
79 | if (!sections) | ||
80 | return 0; | ||
81 | |||
82 | num_sections = size / (3 * sizeof(u32)); | ||
83 | |||
84 | for (i = 0; i < num_sections; i++, sections += 3) { | ||
85 | u32 type = (u32)of_read_number(sections, 1); | ||
86 | |||
87 | switch (type) { | ||
88 | case FADUMP_CPU_STATE_DATA: | ||
89 | fw_dump.cpu_state_data_size = | ||
90 | of_read_ulong(§ions[1], 2); | ||
91 | break; | ||
92 | case FADUMP_HPTE_REGION: | ||
93 | fw_dump.hpte_region_size = | ||
94 | of_read_ulong(§ions[1], 2); | ||
95 | break; | ||
96 | } | ||
97 | } | ||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | /** | ||
102 | * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM | ||
103 | * | ||
104 | * Function to find the largest memory size we need to reserve during early | ||
105 | * boot process. This will be the size of the memory that is required for a | ||
106 | * kernel to boot successfully. | ||
107 | * | ||
108 | * This function has been taken from phyp-assisted dump feature implementation. | ||
109 | * | ||
110 | * returns larger of 256MB or 5% rounded down to multiples of 256MB. | ||
111 | * | ||
112 | * TODO: Come up with better approach to find out more accurate memory size | ||
113 | * that is required for a kernel to boot successfully. | ||
114 | * | ||
115 | */ | ||
116 | static inline unsigned long fadump_calculate_reserve_size(void) | ||
117 | { | ||
118 | unsigned long size; | ||
119 | |||
120 | /* | ||
121 | * Check if the size is specified through fadump_reserve_mem= cmdline | ||
122 | * option. If yes, then use that. | ||
123 | */ | ||
124 | if (fw_dump.reserve_bootvar) | ||
125 | return fw_dump.reserve_bootvar; | ||
126 | |||
127 | /* divide by 20 to get 5% of value */ | ||
128 | size = memblock_end_of_DRAM() / 20; | ||
129 | |||
130 | /* round it down in multiples of 256 */ | ||
131 | size = size & ~0x0FFFFFFFUL; | ||
132 | |||
133 | /* Truncate to memory_limit. We don't want to over reserve the memory.*/ | ||
134 | if (memory_limit && size > memory_limit) | ||
135 | size = memory_limit; | ||
136 | |||
137 | return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Calculate the total memory size required to be reserved for | ||
142 | * firmware-assisted dump registration. | ||
143 | */ | ||
144 | static unsigned long get_fadump_area_size(void) | ||
145 | { | ||
146 | unsigned long size = 0; | ||
147 | |||
148 | size += fw_dump.cpu_state_data_size; | ||
149 | size += fw_dump.hpte_region_size; | ||
150 | size += fw_dump.boot_memory_size; | ||
151 | |||
152 | size = PAGE_ALIGN(size); | ||
153 | return size; | ||
154 | } | ||
155 | |||
156 | int __init fadump_reserve_mem(void) | ||
157 | { | ||
158 | unsigned long base, size, memory_boundary; | ||
159 | |||
160 | if (!fw_dump.fadump_enabled) | ||
161 | return 0; | ||
162 | |||
163 | if (!fw_dump.fadump_supported) { | ||
164 | printk(KERN_INFO "Firmware-assisted dump is not supported on" | ||
165 | " this hardware\n"); | ||
166 | fw_dump.fadump_enabled = 0; | ||
167 | return 0; | ||
168 | } | ||
169 | /* Initialize boot memory size */ | ||
170 | fw_dump.boot_memory_size = fadump_calculate_reserve_size(); | ||
171 | |||
172 | /* | ||
173 | * Calculate the memory boundary. | ||
174 | * If memory_limit is less than actual memory boundary then reserve | ||
175 | * the memory for fadump beyond the memory_limit and adjust the | ||
176 | * memory_limit accordingly, so that the running kernel can run with | ||
177 | * specified memory_limit. | ||
178 | */ | ||
179 | if (memory_limit && memory_limit < memblock_end_of_DRAM()) { | ||
180 | size = get_fadump_area_size(); | ||
181 | if ((memory_limit + size) < memblock_end_of_DRAM()) | ||
182 | memory_limit += size; | ||
183 | else | ||
184 | memory_limit = memblock_end_of_DRAM(); | ||
185 | printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" | ||
186 | " dump, now %#016llx\n", | ||
187 | (unsigned long long)memory_limit); | ||
188 | } | ||
189 | if (memory_limit) | ||
190 | memory_boundary = memory_limit; | ||
191 | else | ||
192 | memory_boundary = memblock_end_of_DRAM(); | ||
193 | |||
194 | if (fw_dump.dump_active) { | ||
195 | printk(KERN_INFO "Firmware-assisted dump is active.\n"); | ||
196 | /* | ||
197 | * If last boot has crashed then reserve all the memory | ||
198 | * above boot_memory_size so that we don't touch it until | ||
199 | * dump is written to disk by userspace tool. This memory | ||
200 | * will be released for general use once the dump is saved. | ||
201 | */ | ||
202 | base = fw_dump.boot_memory_size; | ||
203 | size = memory_boundary - base; | ||
204 | memblock_reserve(base, size); | ||
205 | printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " | ||
206 | "for saving crash dump\n", | ||
207 | (unsigned long)(size >> 20), | ||
208 | (unsigned long)(base >> 20)); | ||
209 | } else { | ||
210 | /* Reserve the memory at the top of memory. */ | ||
211 | size = get_fadump_area_size(); | ||
212 | base = memory_boundary - size; | ||
213 | memblock_reserve(base, size); | ||
214 | printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " | ||
215 | "for firmware-assisted dump\n", | ||
216 | (unsigned long)(size >> 20), | ||
217 | (unsigned long)(base >> 20)); | ||
218 | } | ||
219 | fw_dump.reserve_dump_area_start = base; | ||
220 | fw_dump.reserve_dump_area_size = size; | ||
221 | return 1; | ||
222 | } | ||
223 | |||
224 | /* Look for fadump= cmdline option. */ | ||
225 | static int __init early_fadump_param(char *p) | ||
226 | { | ||
227 | if (!p) | ||
228 | return 1; | ||
229 | |||
230 | if (strncmp(p, "on", 2) == 0) | ||
231 | fw_dump.fadump_enabled = 1; | ||
232 | else if (strncmp(p, "off", 3) == 0) | ||
233 | fw_dump.fadump_enabled = 0; | ||
234 | |||
235 | return 0; | ||
236 | } | ||
237 | early_param("fadump", early_fadump_param); | ||
238 | |||
239 | /* Look for fadump_reserve_mem= cmdline option */ | ||
240 | static int __init early_fadump_reserve_mem(char *p) | ||
241 | { | ||
242 | if (p) | ||
243 | fw_dump.reserve_bootvar = memparse(p, &p); | ||
244 | return 0; | ||
245 | } | ||
246 | early_param("fadump_reserve_mem", early_fadump_reserve_mem); | ||
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index abe405dab34d..70222b35cfc5 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/phyp_dump.h> | 55 | #include <asm/phyp_dump.h> |
56 | #include <asm/kexec.h> | 56 | #include <asm/kexec.h> |
57 | #include <asm/opal.h> | 57 | #include <asm/opal.h> |
58 | #include <asm/fadump.h> | ||
58 | 59 | ||
59 | #include <mm/mmu_decl.h> | 60 | #include <mm/mmu_decl.h> |
60 | 61 | ||
@@ -719,6 +720,11 @@ void __init early_init_devtree(void *params) | |||
719 | of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); | 720 | of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); |
720 | #endif | 721 | #endif |
721 | 722 | ||
723 | #ifdef CONFIG_FA_DUMP | ||
724 | /* scan tree to see if dump is active during last boot */ | ||
725 | of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); | ||
726 | #endif | ||
727 | |||
722 | /* Pre-initialize the cmd_line with the content of boot_commmand_line, | 728 | /* Pre-initialize the cmd_line with the content of boot_commmand_line, |
723 | * which will be empty except when the content of the variable has | 729 | * which will be empty except when the content of the variable has |
724 | * been overriden by a bootloading mechanism. This happens typically | 730 | * been overriden by a bootloading mechanism. This happens typically |
@@ -750,7 +756,14 @@ void __init early_init_devtree(void *params) | |||
750 | if (PHYSICAL_START > MEMORY_START) | 756 | if (PHYSICAL_START > MEMORY_START) |
751 | memblock_reserve(MEMORY_START, 0x8000); | 757 | memblock_reserve(MEMORY_START, 0x8000); |
752 | reserve_kdump_trampoline(); | 758 | reserve_kdump_trampoline(); |
753 | reserve_crashkernel(); | 759 | #ifdef CONFIG_FA_DUMP |
760 | /* | ||
761 | * If we fail to reserve memory for firmware-assisted dump then | ||
762 | * fallback to kexec based kdump. | ||
763 | */ | ||
764 | if (fadump_reserve_mem() == 0) | ||
765 | #endif | ||
766 | reserve_crashkernel(); | ||
754 | early_reserve_mem(); | 767 | early_reserve_mem(); |
755 | phyp_dump_reserve_mem(); | 768 | phyp_dump_reserve_mem(); |
756 | 769 | ||