aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Young <dyoung@redhat.com>2015-09-09 18:38:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:29:01 -0400
commit2965faa5e03d1e71e9ff9aa143fff39e0a77543a (patch)
tree78b12008d7078a9cd40e157d5b18b795b14d5d9c
parenta43cac0d9dc2073ff2245a171429ddbe1accece7 (diff)
kexec: split kexec_load syscall from kexec core code
There are two kexec load syscalls, kexec_load another and kexec_file_load. kexec_file_load has been splited as kernel/kexec_file.c. In this patch I split kexec_load syscall code to kernel/kexec.c. And add a new kconfig option KEXEC_CORE, so we can disable kexec_load and use kexec_file_load only, or vice verse. The original requirement is from Ted Ts'o, he want kexec kernel signature being checked with CONFIG_KEXEC_VERIFY_SIG enabled. But kexec-tools use kexec_load syscall can bypass the checking. Vivek Goyal proposed to create a common kconfig option so user can compile in only one syscall for loading kexec kernel. KEXEC/KEXEC_FILE selects KEXEC_CORE so that old config files still work. Because there's general code need CONFIG_KEXEC_CORE, so I updated all the architecture Kconfig with a new option KEXEC_CORE, and let KEXEC selects KEXEC_CORE in arch Kconfig. Also updated general kernel code with to kexec_load syscall. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Dave Young <dyoung@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Petr Tesarik <ptesarik@suse.cz> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Josh Boyer <jwboyer@fedoraproject.org> Cc: David Howells <dhowells@redhat.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/tile/Kconfig1
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kvm/vmx.c8
-rw-r--r--arch/x86/platform/efi/efi.c4
-rw-r--r--arch/x86/platform/uv/uv_nmi.c6
-rw-r--r--drivers/firmware/efi/Kconfig2
-rw-r--r--drivers/pci/pci-driver.c2
-rw-r--r--include/linux/kexec.h6
-rw-r--r--init/initramfs.c4
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/kexec.c1495
-rw-r--r--kernel/kexec_core.c1511
-rw-r--r--kernel/ksysfs.c6
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/reboot.c2
-rw-r--r--kernel/sysctl.c2
32 files changed, 1560 insertions, 1527 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 8f3564930580..4e949e58b192 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -2,6 +2,9 @@
2# General architecture dependent options 2# General architecture dependent options
3# 3#
4 4
5config KEXEC_CORE
6 bool
7
5config OPROFILE 8config OPROFILE
6 tristate "OProfile system profiling" 9 tristate "OProfile system profiling"
7 depends on PROFILING 10 depends on PROFILING
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d1b717e1eca..72ad724c67ae 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2020,6 +2020,7 @@ config KEXEC
2020 bool "Kexec system call (EXPERIMENTAL)" 2020 bool "Kexec system call (EXPERIMENTAL)"
2021 depends on (!SMP || PM_SLEEP_SMP) 2021 depends on (!SMP || PM_SLEEP_SMP)
2022 depends on !CPU_V7M 2022 depends on !CPU_V7M
2023 select KEXEC_CORE
2023 help 2024 help
2024 kexec is a system call that implements the ability to shutdown your 2025 kexec is a system call that implements the ability to shutdown your
2025 current kernel, and to start another kernel. It is like a reboot 2026 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 42a91a7aa2b0..eb0249e37981 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -518,6 +518,7 @@ source "drivers/sn/Kconfig"
518config KEXEC 518config KEXEC
519 bool "kexec system call" 519 bool "kexec system call"
520 depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) 520 depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
521 select KEXEC_CORE
521 help 522 help
522 kexec is a system call that implements the ability to shutdown your 523 kexec is a system call that implements the ability to shutdown your
523 current kernel, and to start another kernel. It is like a reboot 524 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 2dd8f63bfbbb..498b567f007b 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -95,6 +95,7 @@ config MMU_SUN3
95config KEXEC 95config KEXEC
96 bool "kexec system call" 96 bool "kexec system call"
97 depends on M68KCLASSIC 97 depends on M68KCLASSIC
98 select KEXEC_CORE
98 help 99 help
99 kexec is a system call that implements the ability to shutdown your 100 kexec is a system call that implements the ability to shutdown your
100 current kernel, and to start another kernel. It is like a reboot 101 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 752acca8de1f..e3aa5b0b4ef1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2597,6 +2597,7 @@ source "kernel/Kconfig.preempt"
2597 2597
2598config KEXEC 2598config KEXEC
2599 bool "Kexec system call" 2599 bool "Kexec system call"
2600 select KEXEC_CORE
2600 help 2601 help
2601 kexec is a system call that implements the ability to shutdown your 2602 kexec is a system call that implements the ability to shutdown your
2602 current kernel, and to start another kernel. It is like a reboot 2603 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b447918b9e2c..9a7057ec2154 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -420,6 +420,7 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
420config KEXEC 420config KEXEC
421 bool "kexec system call" 421 bool "kexec system call"
422 depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) 422 depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
423 select KEXEC_CORE
423 help 424 help
424 kexec is a system call that implements the ability to shutdown your 425 kexec is a system call that implements the ability to shutdown your
425 current kernel, and to start another kernel. It is like a reboot 426 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 4827870f7a6d..1d57000b1b24 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
48 48
49config KEXEC 49config KEXEC
50 def_bool y 50 def_bool y
51 select KEXEC_CORE
51 52
52config AUDIT_ARCH 53config AUDIT_ARCH
53 def_bool y 54 def_bool y
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 50057fed819d..d514df7e04dd 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -602,6 +602,7 @@ source kernel/Kconfig.hz
602config KEXEC 602config KEXEC
603 bool "kexec system call (EXPERIMENTAL)" 603 bool "kexec system call (EXPERIMENTAL)"
604 depends on SUPERH32 && MMU 604 depends on SUPERH32 && MMU
605 select KEXEC_CORE
605 help 606 help
606 kexec is a system call that implements the ability to shutdown your 607 kexec is a system call that implements the ability to shutdown your
607 current kernel, and to start another kernel. It is like a reboot 608 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 2ba12d761723..106c21bd7f44 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -205,6 +205,7 @@ source "kernel/Kconfig.hz"
205 205
206config KEXEC 206config KEXEC
207 bool "kexec system call" 207 bool "kexec system call"
208 select KEXEC_CORE
208 ---help--- 209 ---help---
209 kexec is a system call that implements the ability to shutdown your 210 kexec is a system call that implements the ability to shutdown your
210 current kernel, and to start another kernel. It is like a reboot 211 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc0d73eac047..7aef2d52daa0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1754,6 +1754,7 @@ source kernel/Kconfig.hz
1754 1754
1755config KEXEC 1755config KEXEC
1756 bool "kexec system call" 1756 bool "kexec system call"
1757 select KEXEC_CORE
1757 ---help--- 1758 ---help---
1758 kexec is a system call that implements the ability to shutdown your 1759 kexec is a system call that implements the ability to shutdown your
1759 current kernel, and to start another kernel. It is like a reboot 1760 current kernel, and to start another kernel. It is like a reboot
@@ -1770,8 +1771,8 @@ config KEXEC
1770 1771
1771config KEXEC_FILE 1772config KEXEC_FILE
1772 bool "kexec file based system call" 1773 bool "kexec file based system call"
1774 select KEXEC_CORE
1773 select BUILD_BIN2C 1775 select BUILD_BIN2C
1774 depends on KEXEC
1775 depends on X86_64 1776 depends on X86_64
1776 depends on CRYPTO=y 1777 depends on CRYPTO=y
1777 depends on CRYPTO_SHA256=y 1778 depends on CRYPTO_SHA256=y
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 16ef02596db2..2d6b309c8e9a 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -414,7 +414,7 @@ xloadflags:
414# define XLF23 0 414# define XLF23 0
415#endif 415#endif
416 416
417#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) 417#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE)
418# define XLF4 XLF_EFI_KEXEC 418# define XLF4 XLF_EFI_KEXEC
419#else 419#else
420# define XLF4 0 420# define XLF4 0
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 32ce71375b21..b130d59406fb 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -29,7 +29,7 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs,
29extern void __show_regs(struct pt_regs *regs, int all); 29extern void __show_regs(struct pt_regs *regs, int all);
30extern unsigned long oops_begin(void); 30extern unsigned long oops_begin(void);
31extern void oops_end(unsigned long, struct pt_regs *, int signr); 31extern void oops_end(unsigned long, struct pt_regs *, int signr);
32#ifdef CONFIG_KEXEC 32#ifdef CONFIG_KEXEC_CORE
33extern int in_crash_kexec; 33extern int in_crash_kexec;
34#else 34#else
35/* no crash dump is ever in progress if no crash kernel can be kexec'd */ 35/* no crash dump is ever in progress if no crash kernel can be kexec'd */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9ffdf25e5b86..b1b78ffe01d0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -71,8 +71,8 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o
71obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 71obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
72obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o 72obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
73obj-$(CONFIG_X86_TSC) += trace_clock.o 73obj-$(CONFIG_X86_TSC) += trace_clock.o
74obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 74obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o
75obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 75obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o
76obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o 76obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
77obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 77obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
78obj-y += kprobes/ 78obj-y += kprobes/
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 49487b488061..2c7aafa70702 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -200,7 +200,7 @@ static void kvm_setup_secondary_clock(void)
200 * kind of shutdown from our side, we unregister the clock by writting anything 200 * kind of shutdown from our side, we unregister the clock by writting anything
201 * that does not have the 'enable' bit set in the msr 201 * that does not have the 'enable' bit set in the msr
202 */ 202 */
203#ifdef CONFIG_KEXEC 203#ifdef CONFIG_KEXEC_CORE
204static void kvm_crash_shutdown(struct pt_regs *regs) 204static void kvm_crash_shutdown(struct pt_regs *regs)
205{ 205{
206 native_write_msr(msr_kvm_system_time, 0, 0); 206 native_write_msr(msr_kvm_system_time, 0, 0);
@@ -259,7 +259,7 @@ void __init kvmclock_init(void)
259 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; 259 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
260 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; 260 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
261 machine_ops.shutdown = kvm_shutdown; 261 machine_ops.shutdown = kvm_shutdown;
262#ifdef CONFIG_KEXEC 262#ifdef CONFIG_KEXEC_CORE
263 machine_ops.crash_shutdown = kvm_crash_shutdown; 263 machine_ops.crash_shutdown = kvm_crash_shutdown;
264#endif 264#endif
265 kvm_get_preset_lpj(); 265 kvm_get_preset_lpj();
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 86db4bcd7ce5..02693dd9a079 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -673,7 +673,7 @@ struct machine_ops machine_ops = {
673 .emergency_restart = native_machine_emergency_restart, 673 .emergency_restart = native_machine_emergency_restart,
674 .restart = native_machine_restart, 674 .restart = native_machine_restart,
675 .halt = native_machine_halt, 675 .halt = native_machine_halt,
676#ifdef CONFIG_KEXEC 676#ifdef CONFIG_KEXEC_CORE
677 .crash_shutdown = native_machine_crash_shutdown, 677 .crash_shutdown = native_machine_crash_shutdown,
678#endif 678#endif
679}; 679};
@@ -703,7 +703,7 @@ void machine_halt(void)
703 machine_ops.halt(); 703 machine_ops.halt();
704} 704}
705 705
706#ifdef CONFIG_KEXEC 706#ifdef CONFIG_KEXEC_CORE
707void machine_crash_shutdown(struct pt_regs *regs) 707void machine_crash_shutdown(struct pt_regs *regs)
708{ 708{
709 machine_ops.crash_shutdown(regs); 709 machine_ops.crash_shutdown(regs);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index baadbf90a7c5..fdb7f2a2d328 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -478,7 +478,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
478 * --------- Crashkernel reservation ------------------------------ 478 * --------- Crashkernel reservation ------------------------------
479 */ 479 */
480 480
481#ifdef CONFIG_KEXEC 481#ifdef CONFIG_KEXEC_CORE
482 482
483/* 483/*
484 * Keep the crash kernel below this limit. On 32 bits earlier kernels 484 * Keep the crash kernel below this limit. On 32 bits earlier kernels
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 00bf300fd846..74e4bf11f562 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -364,7 +364,7 @@ INIT_PER_CPU(irq_stack_union);
364 364
365#endif /* CONFIG_X86_32 */ 365#endif /* CONFIG_X86_32 */
366 366
367#ifdef CONFIG_KEXEC 367#ifdef CONFIG_KEXEC_CORE
368#include <asm/kexec.h> 368#include <asm/kexec.h>
369 369
370. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, 370. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 148ea2016022..d01986832afc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1264,7 +1264,7 @@ static void vmcs_load(struct vmcs *vmcs)
1264 vmcs, phys_addr); 1264 vmcs, phys_addr);
1265} 1265}
1266 1266
1267#ifdef CONFIG_KEXEC 1267#ifdef CONFIG_KEXEC_CORE
1268/* 1268/*
1269 * This bitmap is used to indicate whether the vmclear 1269 * This bitmap is used to indicate whether the vmclear
1270 * operation is enabled on all cpus. All disabled by 1270 * operation is enabled on all cpus. All disabled by
@@ -1302,7 +1302,7 @@ static void crash_vmclear_local_loaded_vmcss(void)
1302#else 1302#else
1303static inline void crash_enable_local_vmclear(int cpu) { } 1303static inline void crash_enable_local_vmclear(int cpu) { }
1304static inline void crash_disable_local_vmclear(int cpu) { } 1304static inline void crash_disable_local_vmclear(int cpu) { }
1305#endif /* CONFIG_KEXEC */ 1305#endif /* CONFIG_KEXEC_CORE */
1306 1306
1307static void __loaded_vmcs_clear(void *arg) 1307static void __loaded_vmcs_clear(void *arg)
1308{ 1308{
@@ -10411,7 +10411,7 @@ static int __init vmx_init(void)
10411 if (r) 10411 if (r)
10412 return r; 10412 return r;
10413 10413
10414#ifdef CONFIG_KEXEC 10414#ifdef CONFIG_KEXEC_CORE
10415 rcu_assign_pointer(crash_vmclear_loaded_vmcss, 10415 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
10416 crash_vmclear_local_loaded_vmcss); 10416 crash_vmclear_local_loaded_vmcss);
10417#endif 10417#endif
@@ -10421,7 +10421,7 @@ static int __init vmx_init(void)
10421 10421
10422static void __exit vmx_exit(void) 10422static void __exit vmx_exit(void)
10423{ 10423{
10424#ifdef CONFIG_KEXEC 10424#ifdef CONFIG_KEXEC_CORE
10425 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); 10425 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
10426 synchronize_rcu(); 10426 synchronize_rcu();
10427#endif 10427#endif
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index e4308fe6afe8..1db84c0758b7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -650,7 +650,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
650 650
651static void __init save_runtime_map(void) 651static void __init save_runtime_map(void)
652{ 652{
653#ifdef CONFIG_KEXEC 653#ifdef CONFIG_KEXEC_CORE
654 efi_memory_desc_t *md; 654 efi_memory_desc_t *md;
655 void *tmp, *p, *q = NULL; 655 void *tmp, *p, *q = NULL;
656 int count = 0; 656 int count = 0;
@@ -748,7 +748,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
748 748
749static void __init kexec_enter_virtual_mode(void) 749static void __init kexec_enter_virtual_mode(void)
750{ 750{
751#ifdef CONFIG_KEXEC 751#ifdef CONFIG_KEXEC_CORE
752 efi_memory_desc_t *md; 752 efi_memory_desc_t *md;
753 void *p; 753 void *p;
754 754
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 020c101c255f..5c9f63fa6abf 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -492,7 +492,7 @@ static void uv_nmi_touch_watchdogs(void)
492 touch_nmi_watchdog(); 492 touch_nmi_watchdog();
493} 493}
494 494
495#if defined(CONFIG_KEXEC) 495#if defined(CONFIG_KEXEC_CORE)
496static atomic_t uv_nmi_kexec_failed; 496static atomic_t uv_nmi_kexec_failed;
497static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) 497static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
498{ 498{
@@ -519,13 +519,13 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
519 uv_nmi_sync_exit(0); 519 uv_nmi_sync_exit(0);
520} 520}
521 521
522#else /* !CONFIG_KEXEC */ 522#else /* !CONFIG_KEXEC_CORE */
523static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) 523static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
524{ 524{
525 if (master) 525 if (master)
526 pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); 526 pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
527} 527}
528#endif /* !CONFIG_KEXEC */ 528#endif /* !CONFIG_KEXEC_CORE */
529 529
530#ifdef CONFIG_KGDB 530#ifdef CONFIG_KGDB
531#ifdef CONFIG_KGDB_KDB 531#ifdef CONFIG_KGDB_KDB
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 54071c148340..84533e02fbf8 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -43,7 +43,7 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE
43 43
44config EFI_RUNTIME_MAP 44config EFI_RUNTIME_MAP
45 bool "Export efi runtime maps to sysfs" 45 bool "Export efi runtime maps to sysfs"
46 depends on X86 && EFI && KEXEC 46 depends on X86 && EFI && KEXEC_CORE
47 default y 47 default y
48 help 48 help
49 Export efi runtime memory maps to /sys/firmware/efi/runtime-map. 49 Export efi runtime memory maps to /sys/firmware/efi/runtime-map.
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 52a880ca1768..dd652f2ae03d 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -467,7 +467,7 @@ static void pci_device_shutdown(struct device *dev)
467 pci_msi_shutdown(pci_dev); 467 pci_msi_shutdown(pci_dev);
468 pci_msix_shutdown(pci_dev); 468 pci_msix_shutdown(pci_dev);
469 469
470#ifdef CONFIG_KEXEC 470#ifdef CONFIG_KEXEC_CORE
471 /* 471 /*
472 * If this is a kexec reboot, turn off Bus Master bit on the 472 * If this is a kexec reboot, turn off Bus Master bit on the
473 * device to tell it to not continue to do DMA. Don't touch 473 * device to tell it to not continue to do DMA. Don't touch
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ab150ade0d18..d140b1e9faa7 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -16,7 +16,7 @@
16 16
17#include <uapi/linux/kexec.h> 17#include <uapi/linux/kexec.h>
18 18
19#ifdef CONFIG_KEXEC 19#ifdef CONFIG_KEXEC_CORE
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/linkage.h> 21#include <linux/linkage.h>
22#include <linux/compat.h> 22#include <linux/compat.h>
@@ -329,13 +329,13 @@ int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr,
329int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, 329int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
330 unsigned int relsec); 330 unsigned int relsec);
331 331
332#else /* !CONFIG_KEXEC */ 332#else /* !CONFIG_KEXEC_CORE */
333struct pt_regs; 333struct pt_regs;
334struct task_struct; 334struct task_struct;
335static inline void crash_kexec(struct pt_regs *regs) { } 335static inline void crash_kexec(struct pt_regs *regs) { }
336static inline int kexec_should_crash(struct task_struct *p) { return 0; } 336static inline int kexec_should_crash(struct task_struct *p) { return 0; }
337#define kexec_in_progress false 337#define kexec_in_progress false
338#endif /* CONFIG_KEXEC */ 338#endif /* CONFIG_KEXEC_CORE */
339 339
340#endif /* !defined(__ASSEBMLY__) */ 340#endif /* !defined(__ASSEBMLY__) */
341 341
diff --git a/init/initramfs.c b/init/initramfs.c
index ad1bd7787bbb..b32ad7d97ac9 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -526,14 +526,14 @@ extern unsigned long __initramfs_size;
526 526
527static void __init free_initrd(void) 527static void __init free_initrd(void)
528{ 528{
529#ifdef CONFIG_KEXEC 529#ifdef CONFIG_KEXEC_CORE
530 unsigned long crashk_start = (unsigned long)__va(crashk_res.start); 530 unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
531 unsigned long crashk_end = (unsigned long)__va(crashk_res.end); 531 unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
532#endif 532#endif
533 if (do_retain_initrd) 533 if (do_retain_initrd)
534 goto skip; 534 goto skip;
535 535
536#ifdef CONFIG_KEXEC 536#ifdef CONFIG_KEXEC_CORE
537 /* 537 /*
538 * If the initrd region is overlapped with crashkernel reserved region, 538 * If the initrd region is overlapped with crashkernel reserved region,
539 * free only memory that is not part of crashkernel region. 539 * free only memory that is not part of crashkernel region.
diff --git a/kernel/Makefile b/kernel/Makefile
index 1b4890af5a65..d4988410b410 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_MODULES) += module.o
49obj-$(CONFIG_MODULE_SIG) += module_signing.o 49obj-$(CONFIG_MODULE_SIG) += module_signing.o
50obj-$(CONFIG_KALLSYMS) += kallsyms.o 50obj-$(CONFIG_KALLSYMS) += kallsyms.o
51obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 51obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
52obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
52obj-$(CONFIG_KEXEC) += kexec.o 53obj-$(CONFIG_KEXEC) += kexec.o
53obj-$(CONFIG_KEXEC_FILE) += kexec_file.o 54obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
54obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 55obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e8183895691c..f548f69c4299 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9094,7 +9094,7 @@ static void perf_event_init_cpu(int cpu)
9094 mutex_unlock(&swhash->hlist_mutex); 9094 mutex_unlock(&swhash->hlist_mutex);
9095} 9095}
9096 9096
9097#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC 9097#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
9098static void __perf_event_exit_context(void *__info) 9098static void __perf_event_exit_context(void *__info)
9099{ 9099{
9100 struct remove_event re = { .detach_group = true }; 9100 struct remove_event re = { .detach_group = true };
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 2d73ecfa5505..4c5edc357923 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1,148 +1,23 @@
1/* 1/*
2 * kexec.c - kexec system call 2 * kexec.c - kexec_load system call
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 * 4 *
5 * This source code is licensed under the GNU General Public License, 5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt
10
11#include <linux/capability.h> 9#include <linux/capability.h>
12#include <linux/mm.h> 10#include <linux/mm.h>
13#include <linux/file.h> 11#include <linux/file.h>
14#include <linux/slab.h>
15#include <linux/fs.h>
16#include <linux/kexec.h> 12#include <linux/kexec.h>
17#include <linux/mutex.h> 13#include <linux/mutex.h>
18#include <linux/list.h> 14#include <linux/list.h>
19#include <linux/highmem.h>
20#include <linux/syscalls.h> 15#include <linux/syscalls.h>
21#include <linux/reboot.h>
22#include <linux/ioport.h>
23#include <linux/hardirq.h>
24#include <linux/elf.h>
25#include <linux/elfcore.h>
26#include <linux/utsname.h>
27#include <linux/numa.h>
28#include <linux/suspend.h>
29#include <linux/device.h>
30#include <linux/freezer.h>
31#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
32#include <linux/pm.h> 17#include <linux/slab.h>
33#include <linux/cpu.h>
34#include <linux/console.h>
35#include <linux/swap.h>
36#include <linux/syscore_ops.h>
37#include <linux/compiler.h>
38#include <linux/hugetlb.h>
39
40#include <asm/page.h>
41#include <asm/uaccess.h>
42#include <asm/io.h>
43#include <asm/sections.h>
44 18
45#include <crypto/hash.h>
46#include <crypto/sha.h>
47#include "kexec_internal.h" 19#include "kexec_internal.h"
48 20
49DEFINE_MUTEX(kexec_mutex);
50
51/* Per cpu memory for storing cpu states in case of system crash. */
52note_buf_t __percpu *crash_notes;
53
54/* vmcoreinfo stuff */
55static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
56u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
57size_t vmcoreinfo_size;
58size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
59
60/* Flag to indicate we are going to kexec a new kernel */
61bool kexec_in_progress = false;
62
63
64/* Location of the reserved area for the crash kernel */
65struct resource crashk_res = {
66 .name = "Crash kernel",
67 .start = 0,
68 .end = 0,
69 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
70};
71struct resource crashk_low_res = {
72 .name = "Crash kernel",
73 .start = 0,
74 .end = 0,
75 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
76};
77
78int kexec_should_crash(struct task_struct *p)
79{
80 /*
81 * If crash_kexec_post_notifiers is enabled, don't run
82 * crash_kexec() here yet, which must be run after panic
83 * notifiers in panic().
84 */
85 if (crash_kexec_post_notifiers)
86 return 0;
87 /*
88 * There are 4 panic() calls in do_exit() path, each of which
89 * corresponds to each of these 4 conditions.
90 */
91 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
92 return 1;
93 return 0;
94}
95
96/*
97 * When kexec transitions to the new kernel there is a one-to-one
98 * mapping between physical and virtual addresses. On processors
99 * where you can disable the MMU this is trivial, and easy. For
100 * others it is still a simple predictable page table to setup.
101 *
102 * In that environment kexec copies the new kernel to its final
103 * resting place. This means I can only support memory whose
104 * physical address can fit in an unsigned long. In particular
105 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
106 * If the assembly stub has more restrictive requirements
107 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
108 * defined more restrictively in <asm/kexec.h>.
109 *
110 * The code for the transition from the current kernel to the
111 * the new kernel is placed in the control_code_buffer, whose size
112 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
113 * page of memory is necessary, but some architectures require more.
114 * Because this memory must be identity mapped in the transition from
115 * virtual to physical addresses it must live in the range
116 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
117 * modifiable.
118 *
119 * The assembly stub in the control code buffer is passed a linked list
120 * of descriptor pages detailing the source pages of the new kernel,
121 * and the destination addresses of those source pages. As this data
122 * structure is not used in the context of the current OS, it must
123 * be self-contained.
124 *
125 * The code has been made to work with highmem pages and will use a
126 * destination page in its final resting place (if it happens
127 * to allocate it). The end product of this is that most of the
128 * physical address space, and most of RAM can be used.
129 *
130 * Future directions include:
131 * - allocating a page table with the control code buffer identity
132 * mapped, to simplify machine_kexec and make kexec_on_panic more
133 * reliable.
134 */
135
136/*
137 * KIMAGE_NO_DEST is an impossible destination address..., for
138 * allocating pages whose destination address we do not care about.
139 */
140#define KIMAGE_NO_DEST (-1UL)
141
142static struct page *kimage_alloc_page(struct kimage *image,
143 gfp_t gfp_mask,
144 unsigned long dest);
145
146static int copy_user_segment_list(struct kimage *image, 21static int copy_user_segment_list(struct kimage *image,
147 unsigned long nr_segments, 22 unsigned long nr_segments,
148 struct kexec_segment __user *segments) 23 struct kexec_segment __user *segments)
@@ -160,123 +35,6 @@ static int copy_user_segment_list(struct kimage *image,
160 return ret; 35 return ret;
161} 36}
162 37
163int sanity_check_segment_list(struct kimage *image)
164{
165 int result, i;
166 unsigned long nr_segments = image->nr_segments;
167
168 /*
169 * Verify we have good destination addresses. The caller is
170 * responsible for making certain we don't attempt to load
171 * the new image into invalid or reserved areas of RAM. This
172 * just verifies it is an address we can use.
173 *
174 * Since the kernel does everything in page size chunks ensure
175 * the destination addresses are page aligned. Too many
176 * special cases crop of when we don't do this. The most
177 * insidious is getting overlapping destination addresses
178 * simply because addresses are changed to page size
179 * granularity.
180 */
181 result = -EADDRNOTAVAIL;
182 for (i = 0; i < nr_segments; i++) {
183 unsigned long mstart, mend;
184
185 mstart = image->segment[i].mem;
186 mend = mstart + image->segment[i].memsz;
187 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
188 return result;
189 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
190 return result;
191 }
192
193 /* Verify our destination addresses do not overlap.
194 * If we alloed overlapping destination addresses
195 * through very weird things can happen with no
196 * easy explanation as one segment stops on another.
197 */
198 result = -EINVAL;
199 for (i = 0; i < nr_segments; i++) {
200 unsigned long mstart, mend;
201 unsigned long j;
202
203 mstart = image->segment[i].mem;
204 mend = mstart + image->segment[i].memsz;
205 for (j = 0; j < i; j++) {
206 unsigned long pstart, pend;
207 pstart = image->segment[j].mem;
208 pend = pstart + image->segment[j].memsz;
209 /* Do the segments overlap ? */
210 if ((mend > pstart) && (mstart < pend))
211 return result;
212 }
213 }
214
215 /* Ensure our buffer sizes are strictly less than
216 * our memory sizes. This should always be the case,
217 * and it is easier to check up front than to be surprised
218 * later on.
219 */
220 result = -EINVAL;
221 for (i = 0; i < nr_segments; i++) {
222 if (image->segment[i].bufsz > image->segment[i].memsz)
223 return result;
224 }
225
226 /*
227 * Verify we have good destination addresses. Normally
228 * the caller is responsible for making certain we don't
229 * attempt to load the new image into invalid or reserved
230 * areas of RAM. But crash kernels are preloaded into a
231 * reserved area of ram. We must ensure the addresses
232 * are in the reserved area otherwise preloading the
233 * kernel could corrupt things.
234 */
235
236 if (image->type == KEXEC_TYPE_CRASH) {
237 result = -EADDRNOTAVAIL;
238 for (i = 0; i < nr_segments; i++) {
239 unsigned long mstart, mend;
240
241 mstart = image->segment[i].mem;
242 mend = mstart + image->segment[i].memsz - 1;
243 /* Ensure we are within the crash kernel limits */
244 if ((mstart < crashk_res.start) ||
245 (mend > crashk_res.end))
246 return result;
247 }
248 }
249
250 return 0;
251}
252
253struct kimage *do_kimage_alloc_init(void)
254{
255 struct kimage *image;
256
257 /* Allocate a controlling structure */
258 image = kzalloc(sizeof(*image), GFP_KERNEL);
259 if (!image)
260 return NULL;
261
262 image->head = 0;
263 image->entry = &image->head;
264 image->last_entry = &image->head;
265 image->control_page = ~0; /* By default this does not apply */
266 image->type = KEXEC_TYPE_DEFAULT;
267
268 /* Initialize the list of control pages */
269 INIT_LIST_HEAD(&image->control_pages);
270
271 /* Initialize the list of destination pages */
272 INIT_LIST_HEAD(&image->dest_pages);
273
274 /* Initialize the list of unusable pages */
275 INIT_LIST_HEAD(&image->unusable_pages);
276
277 return image;
278}
279
280static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, 38static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
281 unsigned long nr_segments, 39 unsigned long nr_segments,
282 struct kexec_segment __user *segments, 40 struct kexec_segment __user *segments,
@@ -343,597 +101,6 @@ out_free_image:
343 return ret; 101 return ret;
344} 102}
345 103
346int kimage_is_destination_range(struct kimage *image,
347 unsigned long start,
348 unsigned long end)
349{
350 unsigned long i;
351
352 for (i = 0; i < image->nr_segments; i++) {
353 unsigned long mstart, mend;
354
355 mstart = image->segment[i].mem;
356 mend = mstart + image->segment[i].memsz;
357 if ((end > mstart) && (start < mend))
358 return 1;
359 }
360
361 return 0;
362}
363
364static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
365{
366 struct page *pages;
367
368 pages = alloc_pages(gfp_mask, order);
369 if (pages) {
370 unsigned int count, i;
371 pages->mapping = NULL;
372 set_page_private(pages, order);
373 count = 1 << order;
374 for (i = 0; i < count; i++)
375 SetPageReserved(pages + i);
376 }
377
378 return pages;
379}
380
381static void kimage_free_pages(struct page *page)
382{
383 unsigned int order, count, i;
384
385 order = page_private(page);
386 count = 1 << order;
387 for (i = 0; i < count; i++)
388 ClearPageReserved(page + i);
389 __free_pages(page, order);
390}
391
392void kimage_free_page_list(struct list_head *list)
393{
394 struct list_head *pos, *next;
395
396 list_for_each_safe(pos, next, list) {
397 struct page *page;
398
399 page = list_entry(pos, struct page, lru);
400 list_del(&page->lru);
401 kimage_free_pages(page);
402 }
403}
404
405static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
406 unsigned int order)
407{
408 /* Control pages are special, they are the intermediaries
409 * that are needed while we copy the rest of the pages
410 * to their final resting place. As such they must
411 * not conflict with either the destination addresses
412 * or memory the kernel is already using.
413 *
414 * The only case where we really need more than one of
415 * these are for architectures where we cannot disable
416 * the MMU and must instead generate an identity mapped
417 * page table for all of the memory.
418 *
419 * At worst this runs in O(N) of the image size.
420 */
421 struct list_head extra_pages;
422 struct page *pages;
423 unsigned int count;
424
425 count = 1 << order;
426 INIT_LIST_HEAD(&extra_pages);
427
428 /* Loop while I can allocate a page and the page allocated
429 * is a destination page.
430 */
431 do {
432 unsigned long pfn, epfn, addr, eaddr;
433
434 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
435 if (!pages)
436 break;
437 pfn = page_to_pfn(pages);
438 epfn = pfn + count;
439 addr = pfn << PAGE_SHIFT;
440 eaddr = epfn << PAGE_SHIFT;
441 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
442 kimage_is_destination_range(image, addr, eaddr)) {
443 list_add(&pages->lru, &extra_pages);
444 pages = NULL;
445 }
446 } while (!pages);
447
448 if (pages) {
449 /* Remember the allocated page... */
450 list_add(&pages->lru, &image->control_pages);
451
452 /* Because the page is already in it's destination
453 * location we will never allocate another page at
454 * that address. Therefore kimage_alloc_pages
455 * will not return it (again) and we don't need
456 * to give it an entry in image->segment[].
457 */
458 }
459 /* Deal with the destination pages I have inadvertently allocated.
460 *
461 * Ideally I would convert multi-page allocations into single
462 * page allocations, and add everything to image->dest_pages.
463 *
464 * For now it is simpler to just free the pages.
465 */
466 kimage_free_page_list(&extra_pages);
467
468 return pages;
469}
470
471static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
472 unsigned int order)
473{
474 /* Control pages are special, they are the intermediaries
475 * that are needed while we copy the rest of the pages
476 * to their final resting place. As such they must
477 * not conflict with either the destination addresses
478 * or memory the kernel is already using.
479 *
480 * Control pages are also the only pags we must allocate
481 * when loading a crash kernel. All of the other pages
482 * are specified by the segments and we just memcpy
483 * into them directly.
484 *
485 * The only case where we really need more than one of
486 * these are for architectures where we cannot disable
487 * the MMU and must instead generate an identity mapped
488 * page table for all of the memory.
489 *
490 * Given the low demand this implements a very simple
491 * allocator that finds the first hole of the appropriate
492 * size in the reserved memory region, and allocates all
493 * of the memory up to and including the hole.
494 */
495 unsigned long hole_start, hole_end, size;
496 struct page *pages;
497
498 pages = NULL;
499 size = (1 << order) << PAGE_SHIFT;
500 hole_start = (image->control_page + (size - 1)) & ~(size - 1);
501 hole_end = hole_start + size - 1;
502 while (hole_end <= crashk_res.end) {
503 unsigned long i;
504
505 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
506 break;
507 /* See if I overlap any of the segments */
508 for (i = 0; i < image->nr_segments; i++) {
509 unsigned long mstart, mend;
510
511 mstart = image->segment[i].mem;
512 mend = mstart + image->segment[i].memsz - 1;
513 if ((hole_end >= mstart) && (hole_start <= mend)) {
514 /* Advance the hole to the end of the segment */
515 hole_start = (mend + (size - 1)) & ~(size - 1);
516 hole_end = hole_start + size - 1;
517 break;
518 }
519 }
520 /* If I don't overlap any segments I have found my hole! */
521 if (i == image->nr_segments) {
522 pages = pfn_to_page(hole_start >> PAGE_SHIFT);
523 break;
524 }
525 }
526 if (pages)
527 image->control_page = hole_end;
528
529 return pages;
530}
531
532
533struct page *kimage_alloc_control_pages(struct kimage *image,
534 unsigned int order)
535{
536 struct page *pages = NULL;
537
538 switch (image->type) {
539 case KEXEC_TYPE_DEFAULT:
540 pages = kimage_alloc_normal_control_pages(image, order);
541 break;
542 case KEXEC_TYPE_CRASH:
543 pages = kimage_alloc_crash_control_pages(image, order);
544 break;
545 }
546
547 return pages;
548}
549
550static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
551{
552 if (*image->entry != 0)
553 image->entry++;
554
555 if (image->entry == image->last_entry) {
556 kimage_entry_t *ind_page;
557 struct page *page;
558
559 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
560 if (!page)
561 return -ENOMEM;
562
563 ind_page = page_address(page);
564 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
565 image->entry = ind_page;
566 image->last_entry = ind_page +
567 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
568 }
569 *image->entry = entry;
570 image->entry++;
571 *image->entry = 0;
572
573 return 0;
574}
575
576static int kimage_set_destination(struct kimage *image,
577 unsigned long destination)
578{
579 int result;
580
581 destination &= PAGE_MASK;
582 result = kimage_add_entry(image, destination | IND_DESTINATION);
583
584 return result;
585}
586
587
588static int kimage_add_page(struct kimage *image, unsigned long page)
589{
590 int result;
591
592 page &= PAGE_MASK;
593 result = kimage_add_entry(image, page | IND_SOURCE);
594
595 return result;
596}
597
598
599static void kimage_free_extra_pages(struct kimage *image)
600{
601 /* Walk through and free any extra destination pages I may have */
602 kimage_free_page_list(&image->dest_pages);
603
604 /* Walk through and free any unusable pages I have cached */
605 kimage_free_page_list(&image->unusable_pages);
606
607}
608void kimage_terminate(struct kimage *image)
609{
610 if (*image->entry != 0)
611 image->entry++;
612
613 *image->entry = IND_DONE;
614}
615
616#define for_each_kimage_entry(image, ptr, entry) \
617 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
618 ptr = (entry & IND_INDIRECTION) ? \
619 phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
620
621static void kimage_free_entry(kimage_entry_t entry)
622{
623 struct page *page;
624
625 page = pfn_to_page(entry >> PAGE_SHIFT);
626 kimage_free_pages(page);
627}
628
629void kimage_free(struct kimage *image)
630{
631 kimage_entry_t *ptr, entry;
632 kimage_entry_t ind = 0;
633
634 if (!image)
635 return;
636
637 kimage_free_extra_pages(image);
638 for_each_kimage_entry(image, ptr, entry) {
639 if (entry & IND_INDIRECTION) {
640 /* Free the previous indirection page */
641 if (ind & IND_INDIRECTION)
642 kimage_free_entry(ind);
643 /* Save this indirection page until we are
644 * done with it.
645 */
646 ind = entry;
647 } else if (entry & IND_SOURCE)
648 kimage_free_entry(entry);
649 }
650 /* Free the final indirection page */
651 if (ind & IND_INDIRECTION)
652 kimage_free_entry(ind);
653
654 /* Handle any machine specific cleanup */
655 machine_kexec_cleanup(image);
656
657 /* Free the kexec control pages... */
658 kimage_free_page_list(&image->control_pages);
659
660 /*
661 * Free up any temporary buffers allocated. This might hit if
662 * error occurred much later after buffer allocation.
663 */
664 if (image->file_mode)
665 kimage_file_post_load_cleanup(image);
666
667 kfree(image);
668}
669
670static kimage_entry_t *kimage_dst_used(struct kimage *image,
671 unsigned long page)
672{
673 kimage_entry_t *ptr, entry;
674 unsigned long destination = 0;
675
676 for_each_kimage_entry(image, ptr, entry) {
677 if (entry & IND_DESTINATION)
678 destination = entry & PAGE_MASK;
679 else if (entry & IND_SOURCE) {
680 if (page == destination)
681 return ptr;
682 destination += PAGE_SIZE;
683 }
684 }
685
686 return NULL;
687}
688
689static struct page *kimage_alloc_page(struct kimage *image,
690 gfp_t gfp_mask,
691 unsigned long destination)
692{
693 /*
694 * Here we implement safeguards to ensure that a source page
695 * is not copied to its destination page before the data on
696 * the destination page is no longer useful.
697 *
698 * To do this we maintain the invariant that a source page is
699 * either its own destination page, or it is not a
700 * destination page at all.
701 *
702 * That is slightly stronger than required, but the proof
703 * that no problems will not occur is trivial, and the
704 * implementation is simply to verify.
705 *
706 * When allocating all pages normally this algorithm will run
707 * in O(N) time, but in the worst case it will run in O(N^2)
708 * time. If the runtime is a problem the data structures can
709 * be fixed.
710 */
711 struct page *page;
712 unsigned long addr;
713
714 /*
715 * Walk through the list of destination pages, and see if I
716 * have a match.
717 */
718 list_for_each_entry(page, &image->dest_pages, lru) {
719 addr = page_to_pfn(page) << PAGE_SHIFT;
720 if (addr == destination) {
721 list_del(&page->lru);
722 return page;
723 }
724 }
725 page = NULL;
726 while (1) {
727 kimage_entry_t *old;
728
729 /* Allocate a page, if we run out of memory give up */
730 page = kimage_alloc_pages(gfp_mask, 0);
731 if (!page)
732 return NULL;
733 /* If the page cannot be used file it away */
734 if (page_to_pfn(page) >
735 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
736 list_add(&page->lru, &image->unusable_pages);
737 continue;
738 }
739 addr = page_to_pfn(page) << PAGE_SHIFT;
740
741 /* If it is the destination page we want use it */
742 if (addr == destination)
743 break;
744
745 /* If the page is not a destination page use it */
746 if (!kimage_is_destination_range(image, addr,
747 addr + PAGE_SIZE))
748 break;
749
750 /*
751 * I know that the page is someones destination page.
752 * See if there is already a source page for this
753 * destination page. And if so swap the source pages.
754 */
755 old = kimage_dst_used(image, addr);
756 if (old) {
757 /* If so move it */
758 unsigned long old_addr;
759 struct page *old_page;
760
761 old_addr = *old & PAGE_MASK;
762 old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
763 copy_highpage(page, old_page);
764 *old = addr | (*old & ~PAGE_MASK);
765
766 /* The old page I have found cannot be a
767 * destination page, so return it if it's
768 * gfp_flags honor the ones passed in.
769 */
770 if (!(gfp_mask & __GFP_HIGHMEM) &&
771 PageHighMem(old_page)) {
772 kimage_free_pages(old_page);
773 continue;
774 }
775 addr = old_addr;
776 page = old_page;
777 break;
778 } else {
779 /* Place the page on the destination list I
780 * will use it later.
781 */
782 list_add(&page->lru, &image->dest_pages);
783 }
784 }
785
786 return page;
787}
788
789static int kimage_load_normal_segment(struct kimage *image,
790 struct kexec_segment *segment)
791{
792 unsigned long maddr;
793 size_t ubytes, mbytes;
794 int result;
795 unsigned char __user *buf = NULL;
796 unsigned char *kbuf = NULL;
797
798 result = 0;
799 if (image->file_mode)
800 kbuf = segment->kbuf;
801 else
802 buf = segment->buf;
803 ubytes = segment->bufsz;
804 mbytes = segment->memsz;
805 maddr = segment->mem;
806
807 result = kimage_set_destination(image, maddr);
808 if (result < 0)
809 goto out;
810
811 while (mbytes) {
812 struct page *page;
813 char *ptr;
814 size_t uchunk, mchunk;
815
816 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
817 if (!page) {
818 result = -ENOMEM;
819 goto out;
820 }
821 result = kimage_add_page(image, page_to_pfn(page)
822 << PAGE_SHIFT);
823 if (result < 0)
824 goto out;
825
826 ptr = kmap(page);
827 /* Start with a clear page */
828 clear_page(ptr);
829 ptr += maddr & ~PAGE_MASK;
830 mchunk = min_t(size_t, mbytes,
831 PAGE_SIZE - (maddr & ~PAGE_MASK));
832 uchunk = min(ubytes, mchunk);
833
834 /* For file based kexec, source pages are in kernel memory */
835 if (image->file_mode)
836 memcpy(ptr, kbuf, uchunk);
837 else
838 result = copy_from_user(ptr, buf, uchunk);
839 kunmap(page);
840 if (result) {
841 result = -EFAULT;
842 goto out;
843 }
844 ubytes -= uchunk;
845 maddr += mchunk;
846 if (image->file_mode)
847 kbuf += mchunk;
848 else
849 buf += mchunk;
850 mbytes -= mchunk;
851 }
852out:
853 return result;
854}
855
856static int kimage_load_crash_segment(struct kimage *image,
857 struct kexec_segment *segment)
858{
859 /* For crash dumps kernels we simply copy the data from
860 * user space to it's destination.
861 * We do things a page at a time for the sake of kmap.
862 */
863 unsigned long maddr;
864 size_t ubytes, mbytes;
865 int result;
866 unsigned char __user *buf = NULL;
867 unsigned char *kbuf = NULL;
868
869 result = 0;
870 if (image->file_mode)
871 kbuf = segment->kbuf;
872 else
873 buf = segment->buf;
874 ubytes = segment->bufsz;
875 mbytes = segment->memsz;
876 maddr = segment->mem;
877 while (mbytes) {
878 struct page *page;
879 char *ptr;
880 size_t uchunk, mchunk;
881
882 page = pfn_to_page(maddr >> PAGE_SHIFT);
883 if (!page) {
884 result = -ENOMEM;
885 goto out;
886 }
887 ptr = kmap(page);
888 ptr += maddr & ~PAGE_MASK;
889 mchunk = min_t(size_t, mbytes,
890 PAGE_SIZE - (maddr & ~PAGE_MASK));
891 uchunk = min(ubytes, mchunk);
892 if (mchunk > uchunk) {
893 /* Zero the trailing part of the page */
894 memset(ptr + uchunk, 0, mchunk - uchunk);
895 }
896
897 /* For file based kexec, source pages are in kernel memory */
898 if (image->file_mode)
899 memcpy(ptr, kbuf, uchunk);
900 else
901 result = copy_from_user(ptr, buf, uchunk);
902 kexec_flush_icache_page(page);
903 kunmap(page);
904 if (result) {
905 result = -EFAULT;
906 goto out;
907 }
908 ubytes -= uchunk;
909 maddr += mchunk;
910 if (image->file_mode)
911 kbuf += mchunk;
912 else
913 buf += mchunk;
914 mbytes -= mchunk;
915 }
916out:
917 return result;
918}
919
920int kimage_load_segment(struct kimage *image,
921 struct kexec_segment *segment)
922{
923 int result = -ENOMEM;
924
925 switch (image->type) {
926 case KEXEC_TYPE_DEFAULT:
927 result = kimage_load_normal_segment(image, segment);
928 break;
929 case KEXEC_TYPE_CRASH:
930 result = kimage_load_crash_segment(image, segment);
931 break;
932 }
933
934 return result;
935}
936
937/* 104/*
938 * Exec Kernel system call: for obvious reasons only root may call it. 105 * Exec Kernel system call: for obvious reasons only root may call it.
939 * 106 *
@@ -954,9 +121,6 @@ int kimage_load_segment(struct kimage *image,
954 * kexec does not sync, or unmount filesystems so if you need 121 * kexec does not sync, or unmount filesystems so if you need
955 * that to happen you need to do that yourself. 122 * that to happen you need to do that yourself.
956 */ 123 */
957struct kimage *kexec_image;
958struct kimage *kexec_crash_image;
959int kexec_load_disabled;
960 124
961SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 125SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
962 struct kexec_segment __user *, segments, unsigned long, flags) 126 struct kexec_segment __user *, segments, unsigned long, flags)
@@ -1051,18 +215,6 @@ out:
1051 return result; 215 return result;
1052} 216}
1053 217
1054/*
1055 * Add and remove page tables for crashkernel memory
1056 *
1057 * Provide an empty default implementation here -- architecture
1058 * code may override this
1059 */
1060void __weak crash_map_reserved_pages(void)
1061{}
1062
1063void __weak crash_unmap_reserved_pages(void)
1064{}
1065
1066#ifdef CONFIG_COMPAT 218#ifdef CONFIG_COMPAT
1067COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, 219COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
1068 compat_ulong_t, nr_segments, 220 compat_ulong_t, nr_segments,
@@ -1101,646 +253,3 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
1101 return sys_kexec_load(entry, nr_segments, ksegments, flags); 253 return sys_kexec_load(entry, nr_segments, ksegments, flags);
1102} 254}
1103#endif 255#endif
1104
1105void crash_kexec(struct pt_regs *regs)
1106{
1107 /* Take the kexec_mutex here to prevent sys_kexec_load
1108 * running on one cpu from replacing the crash kernel
1109 * we are using after a panic on a different cpu.
1110 *
1111 * If the crash kernel was not located in a fixed area
1112 * of memory the xchg(&kexec_crash_image) would be
1113 * sufficient. But since I reuse the memory...
1114 */
1115 if (mutex_trylock(&kexec_mutex)) {
1116 if (kexec_crash_image) {
1117 struct pt_regs fixed_regs;
1118
1119 crash_setup_regs(&fixed_regs, regs);
1120 crash_save_vmcoreinfo();
1121 machine_crash_shutdown(&fixed_regs);
1122 machine_kexec(kexec_crash_image);
1123 }
1124 mutex_unlock(&kexec_mutex);
1125 }
1126}
1127
1128size_t crash_get_memory_size(void)
1129{
1130 size_t size = 0;
1131 mutex_lock(&kexec_mutex);
1132 if (crashk_res.end != crashk_res.start)
1133 size = resource_size(&crashk_res);
1134 mutex_unlock(&kexec_mutex);
1135 return size;
1136}
1137
1138void __weak crash_free_reserved_phys_range(unsigned long begin,
1139 unsigned long end)
1140{
1141 unsigned long addr;
1142
1143 for (addr = begin; addr < end; addr += PAGE_SIZE)
1144 free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
1145}
1146
1147int crash_shrink_memory(unsigned long new_size)
1148{
1149 int ret = 0;
1150 unsigned long start, end;
1151 unsigned long old_size;
1152 struct resource *ram_res;
1153
1154 mutex_lock(&kexec_mutex);
1155
1156 if (kexec_crash_image) {
1157 ret = -ENOENT;
1158 goto unlock;
1159 }
1160 start = crashk_res.start;
1161 end = crashk_res.end;
1162 old_size = (end == 0) ? 0 : end - start + 1;
1163 if (new_size >= old_size) {
1164 ret = (new_size == old_size) ? 0 : -EINVAL;
1165 goto unlock;
1166 }
1167
1168 ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
1169 if (!ram_res) {
1170 ret = -ENOMEM;
1171 goto unlock;
1172 }
1173
1174 start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
1175 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
1176
1177 crash_map_reserved_pages();
1178 crash_free_reserved_phys_range(end, crashk_res.end);
1179
1180 if ((start == end) && (crashk_res.parent != NULL))
1181 release_resource(&crashk_res);
1182
1183 ram_res->start = end;
1184 ram_res->end = crashk_res.end;
1185 ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
1186 ram_res->name = "System RAM";
1187
1188 crashk_res.end = end - 1;
1189
1190 insert_resource(&iomem_resource, ram_res);
1191 crash_unmap_reserved_pages();
1192
1193unlock:
1194 mutex_unlock(&kexec_mutex);
1195 return ret;
1196}
1197
1198static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1199 size_t data_len)
1200{
1201 struct elf_note note;
1202
1203 note.n_namesz = strlen(name) + 1;
1204 note.n_descsz = data_len;
1205 note.n_type = type;
1206 memcpy(buf, &note, sizeof(note));
1207 buf += (sizeof(note) + 3)/4;
1208 memcpy(buf, name, note.n_namesz);
1209 buf += (note.n_namesz + 3)/4;
1210 memcpy(buf, data, note.n_descsz);
1211 buf += (note.n_descsz + 3)/4;
1212
1213 return buf;
1214}
1215
1216static void final_note(u32 *buf)
1217{
1218 struct elf_note note;
1219
1220 note.n_namesz = 0;
1221 note.n_descsz = 0;
1222 note.n_type = 0;
1223 memcpy(buf, &note, sizeof(note));
1224}
1225
1226void crash_save_cpu(struct pt_regs *regs, int cpu)
1227{
1228 struct elf_prstatus prstatus;
1229 u32 *buf;
1230
1231 if ((cpu < 0) || (cpu >= nr_cpu_ids))
1232 return;
1233
1234 /* Using ELF notes here is opportunistic.
1235 * I need a well defined structure format
1236 * for the data I pass, and I need tags
1237 * on the data to indicate what information I have
1238 * squirrelled away. ELF notes happen to provide
1239 * all of that, so there is no need to invent something new.
1240 */
1241 buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
1242 if (!buf)
1243 return;
1244 memset(&prstatus, 0, sizeof(prstatus));
1245 prstatus.pr_pid = current->pid;
1246 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1247 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1248 &prstatus, sizeof(prstatus));
1249 final_note(buf);
1250}
1251
1252static int __init crash_notes_memory_init(void)
1253{
1254 /* Allocate memory for saving cpu registers. */
1255 crash_notes = alloc_percpu(note_buf_t);
1256 if (!crash_notes) {
1257 pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
1258 return -ENOMEM;
1259 }
1260 return 0;
1261}
1262subsys_initcall(crash_notes_memory_init);
1263
1264
1265/*
1266 * parsing the "crashkernel" commandline
1267 *
1268 * this code is intended to be called from architecture specific code
1269 */
1270
1271
1272/*
1273 * This function parses command lines in the format
1274 *
1275 * crashkernel=ramsize-range:size[,...][@offset]
1276 *
1277 * The function returns 0 on success and -EINVAL on failure.
1278 */
1279static int __init parse_crashkernel_mem(char *cmdline,
1280 unsigned long long system_ram,
1281 unsigned long long *crash_size,
1282 unsigned long long *crash_base)
1283{
1284 char *cur = cmdline, *tmp;
1285
1286 /* for each entry of the comma-separated list */
1287 do {
1288 unsigned long long start, end = ULLONG_MAX, size;
1289
1290 /* get the start of the range */
1291 start = memparse(cur, &tmp);
1292 if (cur == tmp) {
1293 pr_warn("crashkernel: Memory value expected\n");
1294 return -EINVAL;
1295 }
1296 cur = tmp;
1297 if (*cur != '-') {
1298 pr_warn("crashkernel: '-' expected\n");
1299 return -EINVAL;
1300 }
1301 cur++;
1302
1303 /* if no ':' is here, than we read the end */
1304 if (*cur != ':') {
1305 end = memparse(cur, &tmp);
1306 if (cur == tmp) {
1307 pr_warn("crashkernel: Memory value expected\n");
1308 return -EINVAL;
1309 }
1310 cur = tmp;
1311 if (end <= start) {
1312 pr_warn("crashkernel: end <= start\n");
1313 return -EINVAL;
1314 }
1315 }
1316
1317 if (*cur != ':') {
1318 pr_warn("crashkernel: ':' expected\n");
1319 return -EINVAL;
1320 }
1321 cur++;
1322
1323 size = memparse(cur, &tmp);
1324 if (cur == tmp) {
1325 pr_warn("Memory value expected\n");
1326 return -EINVAL;
1327 }
1328 cur = tmp;
1329 if (size >= system_ram) {
1330 pr_warn("crashkernel: invalid size\n");
1331 return -EINVAL;
1332 }
1333
1334 /* match ? */
1335 if (system_ram >= start && system_ram < end) {
1336 *crash_size = size;
1337 break;
1338 }
1339 } while (*cur++ == ',');
1340
1341 if (*crash_size > 0) {
1342 while (*cur && *cur != ' ' && *cur != '@')
1343 cur++;
1344 if (*cur == '@') {
1345 cur++;
1346 *crash_base = memparse(cur, &tmp);
1347 if (cur == tmp) {
1348 pr_warn("Memory value expected after '@'\n");
1349 return -EINVAL;
1350 }
1351 }
1352 }
1353
1354 return 0;
1355}
1356
1357/*
1358 * That function parses "simple" (old) crashkernel command lines like
1359 *
1360 * crashkernel=size[@offset]
1361 *
1362 * It returns 0 on success and -EINVAL on failure.
1363 */
1364static int __init parse_crashkernel_simple(char *cmdline,
1365 unsigned long long *crash_size,
1366 unsigned long long *crash_base)
1367{
1368 char *cur = cmdline;
1369
1370 *crash_size = memparse(cmdline, &cur);
1371 if (cmdline == cur) {
1372 pr_warn("crashkernel: memory value expected\n");
1373 return -EINVAL;
1374 }
1375
1376 if (*cur == '@')
1377 *crash_base = memparse(cur+1, &cur);
1378 else if (*cur != ' ' && *cur != '\0') {
1379 pr_warn("crashkernel: unrecognized char\n");
1380 return -EINVAL;
1381 }
1382
1383 return 0;
1384}
1385
1386#define SUFFIX_HIGH 0
1387#define SUFFIX_LOW 1
1388#define SUFFIX_NULL 2
1389static __initdata char *suffix_tbl[] = {
1390 [SUFFIX_HIGH] = ",high",
1391 [SUFFIX_LOW] = ",low",
1392 [SUFFIX_NULL] = NULL,
1393};
1394
1395/*
1396 * That function parses "suffix" crashkernel command lines like
1397 *
1398 * crashkernel=size,[high|low]
1399 *
1400 * It returns 0 on success and -EINVAL on failure.
1401 */
1402static int __init parse_crashkernel_suffix(char *cmdline,
1403 unsigned long long *crash_size,
1404 const char *suffix)
1405{
1406 char *cur = cmdline;
1407
1408 *crash_size = memparse(cmdline, &cur);
1409 if (cmdline == cur) {
1410 pr_warn("crashkernel: memory value expected\n");
1411 return -EINVAL;
1412 }
1413
1414 /* check with suffix */
1415 if (strncmp(cur, suffix, strlen(suffix))) {
1416 pr_warn("crashkernel: unrecognized char\n");
1417 return -EINVAL;
1418 }
1419 cur += strlen(suffix);
1420 if (*cur != ' ' && *cur != '\0') {
1421 pr_warn("crashkernel: unrecognized char\n");
1422 return -EINVAL;
1423 }
1424
1425 return 0;
1426}
1427
1428static __init char *get_last_crashkernel(char *cmdline,
1429 const char *name,
1430 const char *suffix)
1431{
1432 char *p = cmdline, *ck_cmdline = NULL;
1433
1434 /* find crashkernel and use the last one if there are more */
1435 p = strstr(p, name);
1436 while (p) {
1437 char *end_p = strchr(p, ' ');
1438 char *q;
1439
1440 if (!end_p)
1441 end_p = p + strlen(p);
1442
1443 if (!suffix) {
1444 int i;
1445
1446 /* skip the one with any known suffix */
1447 for (i = 0; suffix_tbl[i]; i++) {
1448 q = end_p - strlen(suffix_tbl[i]);
1449 if (!strncmp(q, suffix_tbl[i],
1450 strlen(suffix_tbl[i])))
1451 goto next;
1452 }
1453 ck_cmdline = p;
1454 } else {
1455 q = end_p - strlen(suffix);
1456 if (!strncmp(q, suffix, strlen(suffix)))
1457 ck_cmdline = p;
1458 }
1459next:
1460 p = strstr(p+1, name);
1461 }
1462
1463 if (!ck_cmdline)
1464 return NULL;
1465
1466 return ck_cmdline;
1467}
1468
1469static int __init __parse_crashkernel(char *cmdline,
1470 unsigned long long system_ram,
1471 unsigned long long *crash_size,
1472 unsigned long long *crash_base,
1473 const char *name,
1474 const char *suffix)
1475{
1476 char *first_colon, *first_space;
1477 char *ck_cmdline;
1478
1479 BUG_ON(!crash_size || !crash_base);
1480 *crash_size = 0;
1481 *crash_base = 0;
1482
1483 ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1484
1485 if (!ck_cmdline)
1486 return -EINVAL;
1487
1488 ck_cmdline += strlen(name);
1489
1490 if (suffix)
1491 return parse_crashkernel_suffix(ck_cmdline, crash_size,
1492 suffix);
1493 /*
1494 * if the commandline contains a ':', then that's the extended
1495 * syntax -- if not, it must be the classic syntax
1496 */
1497 first_colon = strchr(ck_cmdline, ':');
1498 first_space = strchr(ck_cmdline, ' ');
1499 if (first_colon && (!first_space || first_colon < first_space))
1500 return parse_crashkernel_mem(ck_cmdline, system_ram,
1501 crash_size, crash_base);
1502
1503 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1504}
1505
1506/*
1507 * That function is the entry point for command line parsing and should be
1508 * called from the arch-specific code.
1509 */
1510int __init parse_crashkernel(char *cmdline,
1511 unsigned long long system_ram,
1512 unsigned long long *crash_size,
1513 unsigned long long *crash_base)
1514{
1515 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1516 "crashkernel=", NULL);
1517}
1518
1519int __init parse_crashkernel_high(char *cmdline,
1520 unsigned long long system_ram,
1521 unsigned long long *crash_size,
1522 unsigned long long *crash_base)
1523{
1524 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1525 "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1526}
1527
1528int __init parse_crashkernel_low(char *cmdline,
1529 unsigned long long system_ram,
1530 unsigned long long *crash_size,
1531 unsigned long long *crash_base)
1532{
1533 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1534 "crashkernel=", suffix_tbl[SUFFIX_LOW]);
1535}
1536
1537static void update_vmcoreinfo_note(void)
1538{
1539 u32 *buf = vmcoreinfo_note;
1540
1541 if (!vmcoreinfo_size)
1542 return;
1543 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1544 vmcoreinfo_size);
1545 final_note(buf);
1546}
1547
1548void crash_save_vmcoreinfo(void)
1549{
1550 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1551 update_vmcoreinfo_note();
1552}
1553
1554void vmcoreinfo_append_str(const char *fmt, ...)
1555{
1556 va_list args;
1557 char buf[0x50];
1558 size_t r;
1559
1560 va_start(args, fmt);
1561 r = vscnprintf(buf, sizeof(buf), fmt, args);
1562 va_end(args);
1563
1564 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1565
1566 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1567
1568 vmcoreinfo_size += r;
1569}
1570
1571/*
1572 * provide an empty default implementation here -- architecture
1573 * code may override this
1574 */
1575void __weak arch_crash_save_vmcoreinfo(void)
1576{}
1577
1578unsigned long __weak paddr_vmcoreinfo_note(void)
1579{
1580 return __pa((unsigned long)(char *)&vmcoreinfo_note);
1581}
1582
1583static int __init crash_save_vmcoreinfo_init(void)
1584{
1585 VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1586 VMCOREINFO_PAGESIZE(PAGE_SIZE);
1587
1588 VMCOREINFO_SYMBOL(init_uts_ns);
1589 VMCOREINFO_SYMBOL(node_online_map);
1590#ifdef CONFIG_MMU
1591 VMCOREINFO_SYMBOL(swapper_pg_dir);
1592#endif
1593 VMCOREINFO_SYMBOL(_stext);
1594 VMCOREINFO_SYMBOL(vmap_area_list);
1595
1596#ifndef CONFIG_NEED_MULTIPLE_NODES
1597 VMCOREINFO_SYMBOL(mem_map);
1598 VMCOREINFO_SYMBOL(contig_page_data);
1599#endif
1600#ifdef CONFIG_SPARSEMEM
1601 VMCOREINFO_SYMBOL(mem_section);
1602 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1603 VMCOREINFO_STRUCT_SIZE(mem_section);
1604 VMCOREINFO_OFFSET(mem_section, section_mem_map);
1605#endif
1606 VMCOREINFO_STRUCT_SIZE(page);
1607 VMCOREINFO_STRUCT_SIZE(pglist_data);
1608 VMCOREINFO_STRUCT_SIZE(zone);
1609 VMCOREINFO_STRUCT_SIZE(free_area);
1610 VMCOREINFO_STRUCT_SIZE(list_head);
1611 VMCOREINFO_SIZE(nodemask_t);
1612 VMCOREINFO_OFFSET(page, flags);
1613 VMCOREINFO_OFFSET(page, _count);
1614 VMCOREINFO_OFFSET(page, mapping);
1615 VMCOREINFO_OFFSET(page, lru);
1616 VMCOREINFO_OFFSET(page, _mapcount);
1617 VMCOREINFO_OFFSET(page, private);
1618 VMCOREINFO_OFFSET(pglist_data, node_zones);
1619 VMCOREINFO_OFFSET(pglist_data, nr_zones);
1620#ifdef CONFIG_FLAT_NODE_MEM_MAP
1621 VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1622#endif
1623 VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1624 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1625 VMCOREINFO_OFFSET(pglist_data, node_id);
1626 VMCOREINFO_OFFSET(zone, free_area);
1627 VMCOREINFO_OFFSET(zone, vm_stat);
1628 VMCOREINFO_OFFSET(zone, spanned_pages);
1629 VMCOREINFO_OFFSET(free_area, free_list);
1630 VMCOREINFO_OFFSET(list_head, next);
1631 VMCOREINFO_OFFSET(list_head, prev);
1632 VMCOREINFO_OFFSET(vmap_area, va_start);
1633 VMCOREINFO_OFFSET(vmap_area, list);
1634 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1635 log_buf_kexec_setup();
1636 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1637 VMCOREINFO_NUMBER(NR_FREE_PAGES);
1638 VMCOREINFO_NUMBER(PG_lru);
1639 VMCOREINFO_NUMBER(PG_private);
1640 VMCOREINFO_NUMBER(PG_swapcache);
1641 VMCOREINFO_NUMBER(PG_slab);
1642#ifdef CONFIG_MEMORY_FAILURE
1643 VMCOREINFO_NUMBER(PG_hwpoison);
1644#endif
1645 VMCOREINFO_NUMBER(PG_head_mask);
1646 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1647#ifdef CONFIG_HUGETLBFS
1648 VMCOREINFO_SYMBOL(free_huge_page);
1649#endif
1650
1651 arch_crash_save_vmcoreinfo();
1652 update_vmcoreinfo_note();
1653
1654 return 0;
1655}
1656
1657subsys_initcall(crash_save_vmcoreinfo_init);
1658
1659/*
1660 * Move into place and start executing a preloaded standalone
1661 * executable. If nothing was preloaded return an error.
1662 */
1663int kernel_kexec(void)
1664{
1665 int error = 0;
1666
1667 if (!mutex_trylock(&kexec_mutex))
1668 return -EBUSY;
1669 if (!kexec_image) {
1670 error = -EINVAL;
1671 goto Unlock;
1672 }
1673
1674#ifdef CONFIG_KEXEC_JUMP
1675 if (kexec_image->preserve_context) {
1676 lock_system_sleep();
1677 pm_prepare_console();
1678 error = freeze_processes();
1679 if (error) {
1680 error = -EBUSY;
1681 goto Restore_console;
1682 }
1683 suspend_console();
1684 error = dpm_suspend_start(PMSG_FREEZE);
1685 if (error)
1686 goto Resume_console;
1687 /* At this point, dpm_suspend_start() has been called,
1688 * but *not* dpm_suspend_end(). We *must* call
1689 * dpm_suspend_end() now. Otherwise, drivers for
1690 * some devices (e.g. interrupt controllers) become
1691 * desynchronized with the actual state of the
1692 * hardware at resume time, and evil weirdness ensues.
1693 */
1694 error = dpm_suspend_end(PMSG_FREEZE);
1695 if (error)
1696 goto Resume_devices;
1697 error = disable_nonboot_cpus();
1698 if (error)
1699 goto Enable_cpus;
1700 local_irq_disable();
1701 error = syscore_suspend();
1702 if (error)
1703 goto Enable_irqs;
1704 } else
1705#endif
1706 {
1707 kexec_in_progress = true;
1708 kernel_restart_prepare(NULL);
1709 migrate_to_reboot_cpu();
1710
1711 /*
1712 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1713 * no further code needs to use CPU hotplug (which is true in
1714 * the reboot case). However, the kexec path depends on using
1715 * CPU hotplug again; so re-enable it here.
1716 */
1717 cpu_hotplug_enable();
1718 pr_emerg("Starting new kernel\n");
1719 machine_shutdown();
1720 }
1721
1722 machine_kexec(kexec_image);
1723
1724#ifdef CONFIG_KEXEC_JUMP
1725 if (kexec_image->preserve_context) {
1726 syscore_resume();
1727 Enable_irqs:
1728 local_irq_enable();
1729 Enable_cpus:
1730 enable_nonboot_cpus();
1731 dpm_resume_start(PMSG_RESTORE);
1732 Resume_devices:
1733 dpm_resume_end(PMSG_RESTORE);
1734 Resume_console:
1735 resume_console();
1736 thaw_processes();
1737 Restore_console:
1738 pm_restore_console();
1739 unlock_system_sleep();
1740 }
1741#endif
1742
1743 Unlock:
1744 mutex_unlock(&kexec_mutex);
1745 return error;
1746}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
new file mode 100644
index 000000000000..9aa25c034b2e
--- /dev/null
+++ b/kernel/kexec_core.c
@@ -0,0 +1,1511 @@
1/*
2 * kexec.c - kexec system call core code.
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#define pr_fmt(fmt) "kexec: " fmt
10
11#include <linux/capability.h>
12#include <linux/mm.h>
13#include <linux/file.h>
14#include <linux/slab.h>
15#include <linux/fs.h>
16#include <linux/kexec.h>
17#include <linux/mutex.h>
18#include <linux/list.h>
19#include <linux/highmem.h>
20#include <linux/syscalls.h>
21#include <linux/reboot.h>
22#include <linux/ioport.h>
23#include <linux/hardirq.h>
24#include <linux/elf.h>
25#include <linux/elfcore.h>
26#include <linux/utsname.h>
27#include <linux/numa.h>
28#include <linux/suspend.h>
29#include <linux/device.h>
30#include <linux/freezer.h>
31#include <linux/pm.h>
32#include <linux/cpu.h>
33#include <linux/uaccess.h>
34#include <linux/io.h>
35#include <linux/console.h>
36#include <linux/vmalloc.h>
37#include <linux/swap.h>
38#include <linux/syscore_ops.h>
39#include <linux/compiler.h>
40#include <linux/hugetlb.h>
41
42#include <asm/page.h>
43#include <asm/sections.h>
44
45#include <crypto/hash.h>
46#include <crypto/sha.h>
47#include "kexec_internal.h"
48
49DEFINE_MUTEX(kexec_mutex);
50
51/* Per cpu memory for storing cpu states in case of system crash. */
52note_buf_t __percpu *crash_notes;
53
54/* vmcoreinfo stuff */
55static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
56u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
57size_t vmcoreinfo_size;
58size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
59
60/* Flag to indicate we are going to kexec a new kernel */
61bool kexec_in_progress = false;
62
63
64/* Location of the reserved area for the crash kernel */
65struct resource crashk_res = {
66 .name = "Crash kernel",
67 .start = 0,
68 .end = 0,
69 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
70};
71struct resource crashk_low_res = {
72 .name = "Crash kernel",
73 .start = 0,
74 .end = 0,
75 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
76};
77
78int kexec_should_crash(struct task_struct *p)
79{
80 /*
81 * If crash_kexec_post_notifiers is enabled, don't run
82 * crash_kexec() here yet, which must be run after panic
83 * notifiers in panic().
84 */
85 if (crash_kexec_post_notifiers)
86 return 0;
87 /*
88 * There are 4 panic() calls in do_exit() path, each of which
89 * corresponds to each of these 4 conditions.
90 */
91 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
92 return 1;
93 return 0;
94}
95
96/*
97 * When kexec transitions to the new kernel there is a one-to-one
98 * mapping between physical and virtual addresses. On processors
99 * where you can disable the MMU this is trivial, and easy. For
100 * others it is still a simple predictable page table to setup.
101 *
102 * In that environment kexec copies the new kernel to its final
103 * resting place. This means I can only support memory whose
104 * physical address can fit in an unsigned long. In particular
105 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
106 * If the assembly stub has more restrictive requirements
107 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
108 * defined more restrictively in <asm/kexec.h>.
109 *
110 * The code for the transition from the current kernel to the
111 * the new kernel is placed in the control_code_buffer, whose size
112 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
113 * page of memory is necessary, but some architectures require more.
114 * Because this memory must be identity mapped in the transition from
115 * virtual to physical addresses it must live in the range
116 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
117 * modifiable.
118 *
119 * The assembly stub in the control code buffer is passed a linked list
120 * of descriptor pages detailing the source pages of the new kernel,
121 * and the destination addresses of those source pages. As this data
122 * structure is not used in the context of the current OS, it must
123 * be self-contained.
124 *
125 * The code has been made to work with highmem pages and will use a
126 * destination page in its final resting place (if it happens
127 * to allocate it). The end product of this is that most of the
128 * physical address space, and most of RAM can be used.
129 *
130 * Future directions include:
131 * - allocating a page table with the control code buffer identity
132 * mapped, to simplify machine_kexec and make kexec_on_panic more
133 * reliable.
134 */
135
136/*
137 * KIMAGE_NO_DEST is an impossible destination address..., for
138 * allocating pages whose destination address we do not care about.
139 */
140#define KIMAGE_NO_DEST (-1UL)
141
142static struct page *kimage_alloc_page(struct kimage *image,
143 gfp_t gfp_mask,
144 unsigned long dest);
145
146int sanity_check_segment_list(struct kimage *image)
147{
148 int result, i;
149 unsigned long nr_segments = image->nr_segments;
150
151 /*
152 * Verify we have good destination addresses. The caller is
153 * responsible for making certain we don't attempt to load
154 * the new image into invalid or reserved areas of RAM. This
155 * just verifies it is an address we can use.
156 *
157 * Since the kernel does everything in page size chunks ensure
158 * the destination addresses are page aligned. Too many
159 * special cases crop of when we don't do this. The most
160 * insidious is getting overlapping destination addresses
161 * simply because addresses are changed to page size
162 * granularity.
163 */
164 result = -EADDRNOTAVAIL;
165 for (i = 0; i < nr_segments; i++) {
166 unsigned long mstart, mend;
167
168 mstart = image->segment[i].mem;
169 mend = mstart + image->segment[i].memsz;
170 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
171 return result;
172 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
173 return result;
174 }
175
176 /* Verify our destination addresses do not overlap.
177 * If we alloed overlapping destination addresses
178 * through very weird things can happen with no
179 * easy explanation as one segment stops on another.
180 */
181 result = -EINVAL;
182 for (i = 0; i < nr_segments; i++) {
183 unsigned long mstart, mend;
184 unsigned long j;
185
186 mstart = image->segment[i].mem;
187 mend = mstart + image->segment[i].memsz;
188 for (j = 0; j < i; j++) {
189 unsigned long pstart, pend;
190
191 pstart = image->segment[j].mem;
192 pend = pstart + image->segment[j].memsz;
193 /* Do the segments overlap ? */
194 if ((mend > pstart) && (mstart < pend))
195 return result;
196 }
197 }
198
199 /* Ensure our buffer sizes are strictly less than
200 * our memory sizes. This should always be the case,
201 * and it is easier to check up front than to be surprised
202 * later on.
203 */
204 result = -EINVAL;
205 for (i = 0; i < nr_segments; i++) {
206 if (image->segment[i].bufsz > image->segment[i].memsz)
207 return result;
208 }
209
210 /*
211 * Verify we have good destination addresses. Normally
212 * the caller is responsible for making certain we don't
213 * attempt to load the new image into invalid or reserved
214 * areas of RAM. But crash kernels are preloaded into a
215 * reserved area of ram. We must ensure the addresses
216 * are in the reserved area otherwise preloading the
217 * kernel could corrupt things.
218 */
219
220 if (image->type == KEXEC_TYPE_CRASH) {
221 result = -EADDRNOTAVAIL;
222 for (i = 0; i < nr_segments; i++) {
223 unsigned long mstart, mend;
224
225 mstart = image->segment[i].mem;
226 mend = mstart + image->segment[i].memsz - 1;
227 /* Ensure we are within the crash kernel limits */
228 if ((mstart < crashk_res.start) ||
229 (mend > crashk_res.end))
230 return result;
231 }
232 }
233
234 return 0;
235}
236
237struct kimage *do_kimage_alloc_init(void)
238{
239 struct kimage *image;
240
241 /* Allocate a controlling structure */
242 image = kzalloc(sizeof(*image), GFP_KERNEL);
243 if (!image)
244 return NULL;
245
246 image->head = 0;
247 image->entry = &image->head;
248 image->last_entry = &image->head;
249 image->control_page = ~0; /* By default this does not apply */
250 image->type = KEXEC_TYPE_DEFAULT;
251
252 /* Initialize the list of control pages */
253 INIT_LIST_HEAD(&image->control_pages);
254
255 /* Initialize the list of destination pages */
256 INIT_LIST_HEAD(&image->dest_pages);
257
258 /* Initialize the list of unusable pages */
259 INIT_LIST_HEAD(&image->unusable_pages);
260
261 return image;
262}
263
264int kimage_is_destination_range(struct kimage *image,
265 unsigned long start,
266 unsigned long end)
267{
268 unsigned long i;
269
270 for (i = 0; i < image->nr_segments; i++) {
271 unsigned long mstart, mend;
272
273 mstart = image->segment[i].mem;
274 mend = mstart + image->segment[i].memsz;
275 if ((end > mstart) && (start < mend))
276 return 1;
277 }
278
279 return 0;
280}
281
282static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
283{
284 struct page *pages;
285
286 pages = alloc_pages(gfp_mask, order);
287 if (pages) {
288 unsigned int count, i;
289
290 pages->mapping = NULL;
291 set_page_private(pages, order);
292 count = 1 << order;
293 for (i = 0; i < count; i++)
294 SetPageReserved(pages + i);
295 }
296
297 return pages;
298}
299
300static void kimage_free_pages(struct page *page)
301{
302 unsigned int order, count, i;
303
304 order = page_private(page);
305 count = 1 << order;
306 for (i = 0; i < count; i++)
307 ClearPageReserved(page + i);
308 __free_pages(page, order);
309}
310
311void kimage_free_page_list(struct list_head *list)
312{
313 struct list_head *pos, *next;
314
315 list_for_each_safe(pos, next, list) {
316 struct page *page;
317
318 page = list_entry(pos, struct page, lru);
319 list_del(&page->lru);
320 kimage_free_pages(page);
321 }
322}
323
324static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
325 unsigned int order)
326{
327 /* Control pages are special, they are the intermediaries
328 * that are needed while we copy the rest of the pages
329 * to their final resting place. As such they must
330 * not conflict with either the destination addresses
331 * or memory the kernel is already using.
332 *
333 * The only case where we really need more than one of
334 * these are for architectures where we cannot disable
335 * the MMU and must instead generate an identity mapped
336 * page table for all of the memory.
337 *
338 * At worst this runs in O(N) of the image size.
339 */
340 struct list_head extra_pages;
341 struct page *pages;
342 unsigned int count;
343
344 count = 1 << order;
345 INIT_LIST_HEAD(&extra_pages);
346
347 /* Loop while I can allocate a page and the page allocated
348 * is a destination page.
349 */
350 do {
351 unsigned long pfn, epfn, addr, eaddr;
352
353 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
354 if (!pages)
355 break;
356 pfn = page_to_pfn(pages);
357 epfn = pfn + count;
358 addr = pfn << PAGE_SHIFT;
359 eaddr = epfn << PAGE_SHIFT;
360 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
361 kimage_is_destination_range(image, addr, eaddr)) {
362 list_add(&pages->lru, &extra_pages);
363 pages = NULL;
364 }
365 } while (!pages);
366
367 if (pages) {
368 /* Remember the allocated page... */
369 list_add(&pages->lru, &image->control_pages);
370
371 /* Because the page is already in it's destination
372 * location we will never allocate another page at
373 * that address. Therefore kimage_alloc_pages
374 * will not return it (again) and we don't need
375 * to give it an entry in image->segment[].
376 */
377 }
378 /* Deal with the destination pages I have inadvertently allocated.
379 *
380 * Ideally I would convert multi-page allocations into single
381 * page allocations, and add everything to image->dest_pages.
382 *
383 * For now it is simpler to just free the pages.
384 */
385 kimage_free_page_list(&extra_pages);
386
387 return pages;
388}
389
390static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
391 unsigned int order)
392{
393 /* Control pages are special, they are the intermediaries
394 * that are needed while we copy the rest of the pages
395 * to their final resting place. As such they must
396 * not conflict with either the destination addresses
397 * or memory the kernel is already using.
398 *
399 * Control pages are also the only pags we must allocate
400 * when loading a crash kernel. All of the other pages
401 * are specified by the segments and we just memcpy
402 * into them directly.
403 *
404 * The only case where we really need more than one of
405 * these are for architectures where we cannot disable
406 * the MMU and must instead generate an identity mapped
407 * page table for all of the memory.
408 *
409 * Given the low demand this implements a very simple
410 * allocator that finds the first hole of the appropriate
411 * size in the reserved memory region, and allocates all
412 * of the memory up to and including the hole.
413 */
414 unsigned long hole_start, hole_end, size;
415 struct page *pages;
416
417 pages = NULL;
418 size = (1 << order) << PAGE_SHIFT;
419 hole_start = (image->control_page + (size - 1)) & ~(size - 1);
420 hole_end = hole_start + size - 1;
421 while (hole_end <= crashk_res.end) {
422 unsigned long i;
423
424 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
425 break;
426 /* See if I overlap any of the segments */
427 for (i = 0; i < image->nr_segments; i++) {
428 unsigned long mstart, mend;
429
430 mstart = image->segment[i].mem;
431 mend = mstart + image->segment[i].memsz - 1;
432 if ((hole_end >= mstart) && (hole_start <= mend)) {
433 /* Advance the hole to the end of the segment */
434 hole_start = (mend + (size - 1)) & ~(size - 1);
435 hole_end = hole_start + size - 1;
436 break;
437 }
438 }
439 /* If I don't overlap any segments I have found my hole! */
440 if (i == image->nr_segments) {
441 pages = pfn_to_page(hole_start >> PAGE_SHIFT);
442 break;
443 }
444 }
445 if (pages)
446 image->control_page = hole_end;
447
448 return pages;
449}
450
451
452struct page *kimage_alloc_control_pages(struct kimage *image,
453 unsigned int order)
454{
455 struct page *pages = NULL;
456
457 switch (image->type) {
458 case KEXEC_TYPE_DEFAULT:
459 pages = kimage_alloc_normal_control_pages(image, order);
460 break;
461 case KEXEC_TYPE_CRASH:
462 pages = kimage_alloc_crash_control_pages(image, order);
463 break;
464 }
465
466 return pages;
467}
468
469static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
470{
471 if (*image->entry != 0)
472 image->entry++;
473
474 if (image->entry == image->last_entry) {
475 kimage_entry_t *ind_page;
476 struct page *page;
477
478 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
479 if (!page)
480 return -ENOMEM;
481
482 ind_page = page_address(page);
483 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
484 image->entry = ind_page;
485 image->last_entry = ind_page +
486 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
487 }
488 *image->entry = entry;
489 image->entry++;
490 *image->entry = 0;
491
492 return 0;
493}
494
495static int kimage_set_destination(struct kimage *image,
496 unsigned long destination)
497{
498 int result;
499
500 destination &= PAGE_MASK;
501 result = kimage_add_entry(image, destination | IND_DESTINATION);
502
503 return result;
504}
505
506
507static int kimage_add_page(struct kimage *image, unsigned long page)
508{
509 int result;
510
511 page &= PAGE_MASK;
512 result = kimage_add_entry(image, page | IND_SOURCE);
513
514 return result;
515}
516
517
518static void kimage_free_extra_pages(struct kimage *image)
519{
520 /* Walk through and free any extra destination pages I may have */
521 kimage_free_page_list(&image->dest_pages);
522
523 /* Walk through and free any unusable pages I have cached */
524 kimage_free_page_list(&image->unusable_pages);
525
526}
527void kimage_terminate(struct kimage *image)
528{
529 if (*image->entry != 0)
530 image->entry++;
531
532 *image->entry = IND_DONE;
533}
534
535#define for_each_kimage_entry(image, ptr, entry) \
536 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
537 ptr = (entry & IND_INDIRECTION) ? \
538 phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
539
540static void kimage_free_entry(kimage_entry_t entry)
541{
542 struct page *page;
543
544 page = pfn_to_page(entry >> PAGE_SHIFT);
545 kimage_free_pages(page);
546}
547
548void kimage_free(struct kimage *image)
549{
550 kimage_entry_t *ptr, entry;
551 kimage_entry_t ind = 0;
552
553 if (!image)
554 return;
555
556 kimage_free_extra_pages(image);
557 for_each_kimage_entry(image, ptr, entry) {
558 if (entry & IND_INDIRECTION) {
559 /* Free the previous indirection page */
560 if (ind & IND_INDIRECTION)
561 kimage_free_entry(ind);
562 /* Save this indirection page until we are
563 * done with it.
564 */
565 ind = entry;
566 } else if (entry & IND_SOURCE)
567 kimage_free_entry(entry);
568 }
569 /* Free the final indirection page */
570 if (ind & IND_INDIRECTION)
571 kimage_free_entry(ind);
572
573 /* Handle any machine specific cleanup */
574 machine_kexec_cleanup(image);
575
576 /* Free the kexec control pages... */
577 kimage_free_page_list(&image->control_pages);
578
579 /*
580 * Free up any temporary buffers allocated. This might hit if
581 * error occurred much later after buffer allocation.
582 */
583 if (image->file_mode)
584 kimage_file_post_load_cleanup(image);
585
586 kfree(image);
587}
588
589static kimage_entry_t *kimage_dst_used(struct kimage *image,
590 unsigned long page)
591{
592 kimage_entry_t *ptr, entry;
593 unsigned long destination = 0;
594
595 for_each_kimage_entry(image, ptr, entry) {
596 if (entry & IND_DESTINATION)
597 destination = entry & PAGE_MASK;
598 else if (entry & IND_SOURCE) {
599 if (page == destination)
600 return ptr;
601 destination += PAGE_SIZE;
602 }
603 }
604
605 return NULL;
606}
607
608static struct page *kimage_alloc_page(struct kimage *image,
609 gfp_t gfp_mask,
610 unsigned long destination)
611{
612 /*
613 * Here we implement safeguards to ensure that a source page
614 * is not copied to its destination page before the data on
615 * the destination page is no longer useful.
616 *
617 * To do this we maintain the invariant that a source page is
618 * either its own destination page, or it is not a
619 * destination page at all.
620 *
621 * That is slightly stronger than required, but the proof
622 * that no problems will not occur is trivial, and the
623 * implementation is simply to verify.
624 *
625 * When allocating all pages normally this algorithm will run
626 * in O(N) time, but in the worst case it will run in O(N^2)
627 * time. If the runtime is a problem the data structures can
628 * be fixed.
629 */
630 struct page *page;
631 unsigned long addr;
632
633 /*
634 * Walk through the list of destination pages, and see if I
635 * have a match.
636 */
637 list_for_each_entry(page, &image->dest_pages, lru) {
638 addr = page_to_pfn(page) << PAGE_SHIFT;
639 if (addr == destination) {
640 list_del(&page->lru);
641 return page;
642 }
643 }
644 page = NULL;
645 while (1) {
646 kimage_entry_t *old;
647
648 /* Allocate a page, if we run out of memory give up */
649 page = kimage_alloc_pages(gfp_mask, 0);
650 if (!page)
651 return NULL;
652 /* If the page cannot be used file it away */
653 if (page_to_pfn(page) >
654 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
655 list_add(&page->lru, &image->unusable_pages);
656 continue;
657 }
658 addr = page_to_pfn(page) << PAGE_SHIFT;
659
660 /* If it is the destination page we want use it */
661 if (addr == destination)
662 break;
663
664 /* If the page is not a destination page use it */
665 if (!kimage_is_destination_range(image, addr,
666 addr + PAGE_SIZE))
667 break;
668
669 /*
670 * I know that the page is someones destination page.
671 * See if there is already a source page for this
672 * destination page. And if so swap the source pages.
673 */
674 old = kimage_dst_used(image, addr);
675 if (old) {
676 /* If so move it */
677 unsigned long old_addr;
678 struct page *old_page;
679
680 old_addr = *old & PAGE_MASK;
681 old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
682 copy_highpage(page, old_page);
683 *old = addr | (*old & ~PAGE_MASK);
684
685 /* The old page I have found cannot be a
686 * destination page, so return it if it's
687 * gfp_flags honor the ones passed in.
688 */
689 if (!(gfp_mask & __GFP_HIGHMEM) &&
690 PageHighMem(old_page)) {
691 kimage_free_pages(old_page);
692 continue;
693 }
694 addr = old_addr;
695 page = old_page;
696 break;
697 }
698 /* Place the page on the destination list, to be used later */
699 list_add(&page->lru, &image->dest_pages);
700 }
701
702 return page;
703}
704
705static int kimage_load_normal_segment(struct kimage *image,
706 struct kexec_segment *segment)
707{
708 unsigned long maddr;
709 size_t ubytes, mbytes;
710 int result;
711 unsigned char __user *buf = NULL;
712 unsigned char *kbuf = NULL;
713
714 result = 0;
715 if (image->file_mode)
716 kbuf = segment->kbuf;
717 else
718 buf = segment->buf;
719 ubytes = segment->bufsz;
720 mbytes = segment->memsz;
721 maddr = segment->mem;
722
723 result = kimage_set_destination(image, maddr);
724 if (result < 0)
725 goto out;
726
727 while (mbytes) {
728 struct page *page;
729 char *ptr;
730 size_t uchunk, mchunk;
731
732 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
733 if (!page) {
734 result = -ENOMEM;
735 goto out;
736 }
737 result = kimage_add_page(image, page_to_pfn(page)
738 << PAGE_SHIFT);
739 if (result < 0)
740 goto out;
741
742 ptr = kmap(page);
743 /* Start with a clear page */
744 clear_page(ptr);
745 ptr += maddr & ~PAGE_MASK;
746 mchunk = min_t(size_t, mbytes,
747 PAGE_SIZE - (maddr & ~PAGE_MASK));
748 uchunk = min(ubytes, mchunk);
749
750 /* For file based kexec, source pages are in kernel memory */
751 if (image->file_mode)
752 memcpy(ptr, kbuf, uchunk);
753 else
754 result = copy_from_user(ptr, buf, uchunk);
755 kunmap(page);
756 if (result) {
757 result = -EFAULT;
758 goto out;
759 }
760 ubytes -= uchunk;
761 maddr += mchunk;
762 if (image->file_mode)
763 kbuf += mchunk;
764 else
765 buf += mchunk;
766 mbytes -= mchunk;
767 }
768out:
769 return result;
770}
771
772static int kimage_load_crash_segment(struct kimage *image,
773 struct kexec_segment *segment)
774{
775 /* For crash dumps kernels we simply copy the data from
776 * user space to it's destination.
777 * We do things a page at a time for the sake of kmap.
778 */
779 unsigned long maddr;
780 size_t ubytes, mbytes;
781 int result;
782 unsigned char __user *buf = NULL;
783 unsigned char *kbuf = NULL;
784
785 result = 0;
786 if (image->file_mode)
787 kbuf = segment->kbuf;
788 else
789 buf = segment->buf;
790 ubytes = segment->bufsz;
791 mbytes = segment->memsz;
792 maddr = segment->mem;
793 while (mbytes) {
794 struct page *page;
795 char *ptr;
796 size_t uchunk, mchunk;
797
798 page = pfn_to_page(maddr >> PAGE_SHIFT);
799 if (!page) {
800 result = -ENOMEM;
801 goto out;
802 }
803 ptr = kmap(page);
804 ptr += maddr & ~PAGE_MASK;
805 mchunk = min_t(size_t, mbytes,
806 PAGE_SIZE - (maddr & ~PAGE_MASK));
807 uchunk = min(ubytes, mchunk);
808 if (mchunk > uchunk) {
809 /* Zero the trailing part of the page */
810 memset(ptr + uchunk, 0, mchunk - uchunk);
811 }
812
813 /* For file based kexec, source pages are in kernel memory */
814 if (image->file_mode)
815 memcpy(ptr, kbuf, uchunk);
816 else
817 result = copy_from_user(ptr, buf, uchunk);
818 kexec_flush_icache_page(page);
819 kunmap(page);
820 if (result) {
821 result = -EFAULT;
822 goto out;
823 }
824 ubytes -= uchunk;
825 maddr += mchunk;
826 if (image->file_mode)
827 kbuf += mchunk;
828 else
829 buf += mchunk;
830 mbytes -= mchunk;
831 }
832out:
833 return result;
834}
835
836int kimage_load_segment(struct kimage *image,
837 struct kexec_segment *segment)
838{
839 int result = -ENOMEM;
840
841 switch (image->type) {
842 case KEXEC_TYPE_DEFAULT:
843 result = kimage_load_normal_segment(image, segment);
844 break;
845 case KEXEC_TYPE_CRASH:
846 result = kimage_load_crash_segment(image, segment);
847 break;
848 }
849
850 return result;
851}
852
853struct kimage *kexec_image;
854struct kimage *kexec_crash_image;
855int kexec_load_disabled;
856
857void crash_kexec(struct pt_regs *regs)
858{
859 /* Take the kexec_mutex here to prevent sys_kexec_load
860 * running on one cpu from replacing the crash kernel
861 * we are using after a panic on a different cpu.
862 *
863 * If the crash kernel was not located in a fixed area
864 * of memory the xchg(&kexec_crash_image) would be
865 * sufficient. But since I reuse the memory...
866 */
867 if (mutex_trylock(&kexec_mutex)) {
868 if (kexec_crash_image) {
869 struct pt_regs fixed_regs;
870
871 crash_setup_regs(&fixed_regs, regs);
872 crash_save_vmcoreinfo();
873 machine_crash_shutdown(&fixed_regs);
874 machine_kexec(kexec_crash_image);
875 }
876 mutex_unlock(&kexec_mutex);
877 }
878}
879
880size_t crash_get_memory_size(void)
881{
882 size_t size = 0;
883
884 mutex_lock(&kexec_mutex);
885 if (crashk_res.end != crashk_res.start)
886 size = resource_size(&crashk_res);
887 mutex_unlock(&kexec_mutex);
888 return size;
889}
890
891void __weak crash_free_reserved_phys_range(unsigned long begin,
892 unsigned long end)
893{
894 unsigned long addr;
895
896 for (addr = begin; addr < end; addr += PAGE_SIZE)
897 free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
898}
899
900int crash_shrink_memory(unsigned long new_size)
901{
902 int ret = 0;
903 unsigned long start, end;
904 unsigned long old_size;
905 struct resource *ram_res;
906
907 mutex_lock(&kexec_mutex);
908
909 if (kexec_crash_image) {
910 ret = -ENOENT;
911 goto unlock;
912 }
913 start = crashk_res.start;
914 end = crashk_res.end;
915 old_size = (end == 0) ? 0 : end - start + 1;
916 if (new_size >= old_size) {
917 ret = (new_size == old_size) ? 0 : -EINVAL;
918 goto unlock;
919 }
920
921 ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
922 if (!ram_res) {
923 ret = -ENOMEM;
924 goto unlock;
925 }
926
927 start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
928 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
929
930 crash_map_reserved_pages();
931 crash_free_reserved_phys_range(end, crashk_res.end);
932
933 if ((start == end) && (crashk_res.parent != NULL))
934 release_resource(&crashk_res);
935
936 ram_res->start = end;
937 ram_res->end = crashk_res.end;
938 ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
939 ram_res->name = "System RAM";
940
941 crashk_res.end = end - 1;
942
943 insert_resource(&iomem_resource, ram_res);
944 crash_unmap_reserved_pages();
945
946unlock:
947 mutex_unlock(&kexec_mutex);
948 return ret;
949}
950
951static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
952 size_t data_len)
953{
954 struct elf_note note;
955
956 note.n_namesz = strlen(name) + 1;
957 note.n_descsz = data_len;
958 note.n_type = type;
959 memcpy(buf, &note, sizeof(note));
960 buf += (sizeof(note) + 3)/4;
961 memcpy(buf, name, note.n_namesz);
962 buf += (note.n_namesz + 3)/4;
963 memcpy(buf, data, note.n_descsz);
964 buf += (note.n_descsz + 3)/4;
965
966 return buf;
967}
968
969static void final_note(u32 *buf)
970{
971 struct elf_note note;
972
973 note.n_namesz = 0;
974 note.n_descsz = 0;
975 note.n_type = 0;
976 memcpy(buf, &note, sizeof(note));
977}
978
979void crash_save_cpu(struct pt_regs *regs, int cpu)
980{
981 struct elf_prstatus prstatus;
982 u32 *buf;
983
984 if ((cpu < 0) || (cpu >= nr_cpu_ids))
985 return;
986
987 /* Using ELF notes here is opportunistic.
988 * I need a well defined structure format
989 * for the data I pass, and I need tags
990 * on the data to indicate what information I have
991 * squirrelled away. ELF notes happen to provide
992 * all of that, so there is no need to invent something new.
993 */
994 buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
995 if (!buf)
996 return;
997 memset(&prstatus, 0, sizeof(prstatus));
998 prstatus.pr_pid = current->pid;
999 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1000 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1001 &prstatus, sizeof(prstatus));
1002 final_note(buf);
1003}
1004
1005static int __init crash_notes_memory_init(void)
1006{
1007 /* Allocate memory for saving cpu registers. */
1008 crash_notes = alloc_percpu(note_buf_t);
1009 if (!crash_notes) {
1010 pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
1011 return -ENOMEM;
1012 }
1013 return 0;
1014}
1015subsys_initcall(crash_notes_memory_init);
1016
1017
1018/*
1019 * parsing the "crashkernel" commandline
1020 *
1021 * this code is intended to be called from architecture specific code
1022 */
1023
1024
1025/*
1026 * This function parses command lines in the format
1027 *
1028 * crashkernel=ramsize-range:size[,...][@offset]
1029 *
1030 * The function returns 0 on success and -EINVAL on failure.
1031 */
1032static int __init parse_crashkernel_mem(char *cmdline,
1033 unsigned long long system_ram,
1034 unsigned long long *crash_size,
1035 unsigned long long *crash_base)
1036{
1037 char *cur = cmdline, *tmp;
1038
1039 /* for each entry of the comma-separated list */
1040 do {
1041 unsigned long long start, end = ULLONG_MAX, size;
1042
1043 /* get the start of the range */
1044 start = memparse(cur, &tmp);
1045 if (cur == tmp) {
1046 pr_warn("crashkernel: Memory value expected\n");
1047 return -EINVAL;
1048 }
1049 cur = tmp;
1050 if (*cur != '-') {
1051 pr_warn("crashkernel: '-' expected\n");
1052 return -EINVAL;
1053 }
1054 cur++;
1055
1056 /* if no ':' is here, than we read the end */
1057 if (*cur != ':') {
1058 end = memparse(cur, &tmp);
1059 if (cur == tmp) {
1060 pr_warn("crashkernel: Memory value expected\n");
1061 return -EINVAL;
1062 }
1063 cur = tmp;
1064 if (end <= start) {
1065 pr_warn("crashkernel: end <= start\n");
1066 return -EINVAL;
1067 }
1068 }
1069
1070 if (*cur != ':') {
1071 pr_warn("crashkernel: ':' expected\n");
1072 return -EINVAL;
1073 }
1074 cur++;
1075
1076 size = memparse(cur, &tmp);
1077 if (cur == tmp) {
1078 pr_warn("Memory value expected\n");
1079 return -EINVAL;
1080 }
1081 cur = tmp;
1082 if (size >= system_ram) {
1083 pr_warn("crashkernel: invalid size\n");
1084 return -EINVAL;
1085 }
1086
1087 /* match ? */
1088 if (system_ram >= start && system_ram < end) {
1089 *crash_size = size;
1090 break;
1091 }
1092 } while (*cur++ == ',');
1093
1094 if (*crash_size > 0) {
1095 while (*cur && *cur != ' ' && *cur != '@')
1096 cur++;
1097 if (*cur == '@') {
1098 cur++;
1099 *crash_base = memparse(cur, &tmp);
1100 if (cur == tmp) {
1101 pr_warn("Memory value expected after '@'\n");
1102 return -EINVAL;
1103 }
1104 }
1105 }
1106
1107 return 0;
1108}
1109
1110/*
1111 * That function parses "simple" (old) crashkernel command lines like
1112 *
1113 * crashkernel=size[@offset]
1114 *
1115 * It returns 0 on success and -EINVAL on failure.
1116 */
1117static int __init parse_crashkernel_simple(char *cmdline,
1118 unsigned long long *crash_size,
1119 unsigned long long *crash_base)
1120{
1121 char *cur = cmdline;
1122
1123 *crash_size = memparse(cmdline, &cur);
1124 if (cmdline == cur) {
1125 pr_warn("crashkernel: memory value expected\n");
1126 return -EINVAL;
1127 }
1128
1129 if (*cur == '@')
1130 *crash_base = memparse(cur+1, &cur);
1131 else if (*cur != ' ' && *cur != '\0') {
1132 pr_warn("crashkernel: unrecognized char\n");
1133 return -EINVAL;
1134 }
1135
1136 return 0;
1137}
1138
1139#define SUFFIX_HIGH 0
1140#define SUFFIX_LOW 1
1141#define SUFFIX_NULL 2
1142static __initdata char *suffix_tbl[] = {
1143 [SUFFIX_HIGH] = ",high",
1144 [SUFFIX_LOW] = ",low",
1145 [SUFFIX_NULL] = NULL,
1146};
1147
1148/*
1149 * That function parses "suffix" crashkernel command lines like
1150 *
1151 * crashkernel=size,[high|low]
1152 *
1153 * It returns 0 on success and -EINVAL on failure.
1154 */
1155static int __init parse_crashkernel_suffix(char *cmdline,
1156 unsigned long long *crash_size,
1157 const char *suffix)
1158{
1159 char *cur = cmdline;
1160
1161 *crash_size = memparse(cmdline, &cur);
1162 if (cmdline == cur) {
1163 pr_warn("crashkernel: memory value expected\n");
1164 return -EINVAL;
1165 }
1166
1167 /* check with suffix */
1168 if (strncmp(cur, suffix, strlen(suffix))) {
1169 pr_warn("crashkernel: unrecognized char\n");
1170 return -EINVAL;
1171 }
1172 cur += strlen(suffix);
1173 if (*cur != ' ' && *cur != '\0') {
1174 pr_warn("crashkernel: unrecognized char\n");
1175 return -EINVAL;
1176 }
1177
1178 return 0;
1179}
1180
1181static __init char *get_last_crashkernel(char *cmdline,
1182 const char *name,
1183 const char *suffix)
1184{
1185 char *p = cmdline, *ck_cmdline = NULL;
1186
1187 /* find crashkernel and use the last one if there are more */
1188 p = strstr(p, name);
1189 while (p) {
1190 char *end_p = strchr(p, ' ');
1191 char *q;
1192
1193 if (!end_p)
1194 end_p = p + strlen(p);
1195
1196 if (!suffix) {
1197 int i;
1198
1199 /* skip the one with any known suffix */
1200 for (i = 0; suffix_tbl[i]; i++) {
1201 q = end_p - strlen(suffix_tbl[i]);
1202 if (!strncmp(q, suffix_tbl[i],
1203 strlen(suffix_tbl[i])))
1204 goto next;
1205 }
1206 ck_cmdline = p;
1207 } else {
1208 q = end_p - strlen(suffix);
1209 if (!strncmp(q, suffix, strlen(suffix)))
1210 ck_cmdline = p;
1211 }
1212next:
1213 p = strstr(p+1, name);
1214 }
1215
1216 if (!ck_cmdline)
1217 return NULL;
1218
1219 return ck_cmdline;
1220}
1221
1222static int __init __parse_crashkernel(char *cmdline,
1223 unsigned long long system_ram,
1224 unsigned long long *crash_size,
1225 unsigned long long *crash_base,
1226 const char *name,
1227 const char *suffix)
1228{
1229 char *first_colon, *first_space;
1230 char *ck_cmdline;
1231
1232 BUG_ON(!crash_size || !crash_base);
1233 *crash_size = 0;
1234 *crash_base = 0;
1235
1236 ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1237
1238 if (!ck_cmdline)
1239 return -EINVAL;
1240
1241 ck_cmdline += strlen(name);
1242
1243 if (suffix)
1244 return parse_crashkernel_suffix(ck_cmdline, crash_size,
1245 suffix);
1246 /*
1247 * if the commandline contains a ':', then that's the extended
1248 * syntax -- if not, it must be the classic syntax
1249 */
1250 first_colon = strchr(ck_cmdline, ':');
1251 first_space = strchr(ck_cmdline, ' ');
1252 if (first_colon && (!first_space || first_colon < first_space))
1253 return parse_crashkernel_mem(ck_cmdline, system_ram,
1254 crash_size, crash_base);
1255
1256 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1257}
1258
1259/*
1260 * That function is the entry point for command line parsing and should be
1261 * called from the arch-specific code.
1262 */
1263int __init parse_crashkernel(char *cmdline,
1264 unsigned long long system_ram,
1265 unsigned long long *crash_size,
1266 unsigned long long *crash_base)
1267{
1268 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1269 "crashkernel=", NULL);
1270}
1271
1272int __init parse_crashkernel_high(char *cmdline,
1273 unsigned long long system_ram,
1274 unsigned long long *crash_size,
1275 unsigned long long *crash_base)
1276{
1277 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1278 "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1279}
1280
1281int __init parse_crashkernel_low(char *cmdline,
1282 unsigned long long system_ram,
1283 unsigned long long *crash_size,
1284 unsigned long long *crash_base)
1285{
1286 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1287 "crashkernel=", suffix_tbl[SUFFIX_LOW]);
1288}
1289
1290static void update_vmcoreinfo_note(void)
1291{
1292 u32 *buf = vmcoreinfo_note;
1293
1294 if (!vmcoreinfo_size)
1295 return;
1296 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1297 vmcoreinfo_size);
1298 final_note(buf);
1299}
1300
1301void crash_save_vmcoreinfo(void)
1302{
1303 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1304 update_vmcoreinfo_note();
1305}
1306
1307void vmcoreinfo_append_str(const char *fmt, ...)
1308{
1309 va_list args;
1310 char buf[0x50];
1311 size_t r;
1312
1313 va_start(args, fmt);
1314 r = vscnprintf(buf, sizeof(buf), fmt, args);
1315 va_end(args);
1316
1317 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1318
1319 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1320
1321 vmcoreinfo_size += r;
1322}
1323
1324/*
1325 * provide an empty default implementation here -- architecture
1326 * code may override this
1327 */
1328void __weak arch_crash_save_vmcoreinfo(void)
1329{}
1330
1331unsigned long __weak paddr_vmcoreinfo_note(void)
1332{
1333 return __pa((unsigned long)(char *)&vmcoreinfo_note);
1334}
1335
1336static int __init crash_save_vmcoreinfo_init(void)
1337{
1338 VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1339 VMCOREINFO_PAGESIZE(PAGE_SIZE);
1340
1341 VMCOREINFO_SYMBOL(init_uts_ns);
1342 VMCOREINFO_SYMBOL(node_online_map);
1343#ifdef CONFIG_MMU
1344 VMCOREINFO_SYMBOL(swapper_pg_dir);
1345#endif
1346 VMCOREINFO_SYMBOL(_stext);
1347 VMCOREINFO_SYMBOL(vmap_area_list);
1348
1349#ifndef CONFIG_NEED_MULTIPLE_NODES
1350 VMCOREINFO_SYMBOL(mem_map);
1351 VMCOREINFO_SYMBOL(contig_page_data);
1352#endif
1353#ifdef CONFIG_SPARSEMEM
1354 VMCOREINFO_SYMBOL(mem_section);
1355 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1356 VMCOREINFO_STRUCT_SIZE(mem_section);
1357 VMCOREINFO_OFFSET(mem_section, section_mem_map);
1358#endif
1359 VMCOREINFO_STRUCT_SIZE(page);
1360 VMCOREINFO_STRUCT_SIZE(pglist_data);
1361 VMCOREINFO_STRUCT_SIZE(zone);
1362 VMCOREINFO_STRUCT_SIZE(free_area);
1363 VMCOREINFO_STRUCT_SIZE(list_head);
1364 VMCOREINFO_SIZE(nodemask_t);
1365 VMCOREINFO_OFFSET(page, flags);
1366 VMCOREINFO_OFFSET(page, _count);
1367 VMCOREINFO_OFFSET(page, mapping);
1368 VMCOREINFO_OFFSET(page, lru);
1369 VMCOREINFO_OFFSET(page, _mapcount);
1370 VMCOREINFO_OFFSET(page, private);
1371 VMCOREINFO_OFFSET(pglist_data, node_zones);
1372 VMCOREINFO_OFFSET(pglist_data, nr_zones);
1373#ifdef CONFIG_FLAT_NODE_MEM_MAP
1374 VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1375#endif
1376 VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1377 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1378 VMCOREINFO_OFFSET(pglist_data, node_id);
1379 VMCOREINFO_OFFSET(zone, free_area);
1380 VMCOREINFO_OFFSET(zone, vm_stat);
1381 VMCOREINFO_OFFSET(zone, spanned_pages);
1382 VMCOREINFO_OFFSET(free_area, free_list);
1383 VMCOREINFO_OFFSET(list_head, next);
1384 VMCOREINFO_OFFSET(list_head, prev);
1385 VMCOREINFO_OFFSET(vmap_area, va_start);
1386 VMCOREINFO_OFFSET(vmap_area, list);
1387 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1388 log_buf_kexec_setup();
1389 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1390 VMCOREINFO_NUMBER(NR_FREE_PAGES);
1391 VMCOREINFO_NUMBER(PG_lru);
1392 VMCOREINFO_NUMBER(PG_private);
1393 VMCOREINFO_NUMBER(PG_swapcache);
1394 VMCOREINFO_NUMBER(PG_slab);
1395#ifdef CONFIG_MEMORY_FAILURE
1396 VMCOREINFO_NUMBER(PG_hwpoison);
1397#endif
1398 VMCOREINFO_NUMBER(PG_head_mask);
1399 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1400#ifdef CONFIG_HUGETLBFS
1401 VMCOREINFO_SYMBOL(free_huge_page);
1402#endif
1403
1404 arch_crash_save_vmcoreinfo();
1405 update_vmcoreinfo_note();
1406
1407 return 0;
1408}
1409
1410subsys_initcall(crash_save_vmcoreinfo_init);
1411
1412/*
1413 * Move into place and start executing a preloaded standalone
1414 * executable. If nothing was preloaded return an error.
1415 */
1416int kernel_kexec(void)
1417{
1418 int error = 0;
1419
1420 if (!mutex_trylock(&kexec_mutex))
1421 return -EBUSY;
1422 if (!kexec_image) {
1423 error = -EINVAL;
1424 goto Unlock;
1425 }
1426
1427#ifdef CONFIG_KEXEC_JUMP
1428 if (kexec_image->preserve_context) {
1429 lock_system_sleep();
1430 pm_prepare_console();
1431 error = freeze_processes();
1432 if (error) {
1433 error = -EBUSY;
1434 goto Restore_console;
1435 }
1436 suspend_console();
1437 error = dpm_suspend_start(PMSG_FREEZE);
1438 if (error)
1439 goto Resume_console;
1440 /* At this point, dpm_suspend_start() has been called,
1441 * but *not* dpm_suspend_end(). We *must* call
1442 * dpm_suspend_end() now. Otherwise, drivers for
1443 * some devices (e.g. interrupt controllers) become
1444 * desynchronized with the actual state of the
1445 * hardware at resume time, and evil weirdness ensues.
1446 */
1447 error = dpm_suspend_end(PMSG_FREEZE);
1448 if (error)
1449 goto Resume_devices;
1450 error = disable_nonboot_cpus();
1451 if (error)
1452 goto Enable_cpus;
1453 local_irq_disable();
1454 error = syscore_suspend();
1455 if (error)
1456 goto Enable_irqs;
1457 } else
1458#endif
1459 {
1460 kexec_in_progress = true;
1461 kernel_restart_prepare(NULL);
1462 migrate_to_reboot_cpu();
1463
1464 /*
1465 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1466 * no further code needs to use CPU hotplug (which is true in
1467 * the reboot case). However, the kexec path depends on using
1468 * CPU hotplug again; so re-enable it here.
1469 */
1470 cpu_hotplug_enable();
1471 pr_emerg("Starting new kernel\n");
1472 machine_shutdown();
1473 }
1474
1475 machine_kexec(kexec_image);
1476
1477#ifdef CONFIG_KEXEC_JUMP
1478 if (kexec_image->preserve_context) {
1479 syscore_resume();
1480 Enable_irqs:
1481 local_irq_enable();
1482 Enable_cpus:
1483 enable_nonboot_cpus();
1484 dpm_resume_start(PMSG_RESTORE);
1485 Resume_devices:
1486 dpm_resume_end(PMSG_RESTORE);
1487 Resume_console:
1488 resume_console();
1489 thaw_processes();
1490 Restore_console:
1491 pm_restore_console();
1492 unlock_system_sleep();
1493 }
1494#endif
1495
1496 Unlock:
1497 mutex_unlock(&kexec_mutex);
1498 return error;
1499}
1500
1501/*
1502 * Add and remove page tables for crashkernel memory
1503 *
1504 * Provide an empty default implementation here -- architecture
1505 * code may override this
1506 */
1507void __weak crash_map_reserved_pages(void)
1508{}
1509
1510void __weak crash_unmap_reserved_pages(void)
1511{}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6683ccef9fff..e83b26464061 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -90,7 +90,7 @@ static ssize_t profiling_store(struct kobject *kobj,
90KERNEL_ATTR_RW(profiling); 90KERNEL_ATTR_RW(profiling);
91#endif 91#endif
92 92
93#ifdef CONFIG_KEXEC 93#ifdef CONFIG_KEXEC_CORE
94static ssize_t kexec_loaded_show(struct kobject *kobj, 94static ssize_t kexec_loaded_show(struct kobject *kobj,
95 struct kobj_attribute *attr, char *buf) 95 struct kobj_attribute *attr, char *buf)
96{ 96{
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
134} 134}
135KERNEL_ATTR_RO(vmcoreinfo); 135KERNEL_ATTR_RO(vmcoreinfo);
136 136
137#endif /* CONFIG_KEXEC */ 137#endif /* CONFIG_KEXEC_CORE */
138 138
139/* whether file capabilities are enabled */ 139/* whether file capabilities are enabled */
140static ssize_t fscaps_show(struct kobject *kobj, 140static ssize_t fscaps_show(struct kobject *kobj,
@@ -196,7 +196,7 @@ static struct attribute * kernel_attrs[] = {
196#ifdef CONFIG_PROFILING 196#ifdef CONFIG_PROFILING
197 &profiling_attr.attr, 197 &profiling_attr.attr,
198#endif 198#endif
199#ifdef CONFIG_KEXEC 199#ifdef CONFIG_KEXEC_CORE
200 &kexec_loaded_attr.attr, 200 &kexec_loaded_attr.attr,
201 &kexec_crash_loaded_attr.attr, 201 &kexec_crash_loaded_attr.attr,
202 &kexec_crash_size_attr.attr, 202 &kexec_crash_size_attr.attr,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index cf8c24203368..8f0324ef72ab 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -835,7 +835,7 @@ const struct file_operations kmsg_fops = {
835 .release = devkmsg_release, 835 .release = devkmsg_release,
836}; 836};
837 837
838#ifdef CONFIG_KEXEC 838#ifdef CONFIG_KEXEC_CORE
839/* 839/*
840 * This appends the listed symbols to /proc/vmcore 840 * This appends the listed symbols to /proc/vmcore
841 * 841 *
diff --git a/kernel/reboot.c b/kernel/reboot.c
index d20c85d9f8c0..bd30a973fe94 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -346,7 +346,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
346 kernel_restart(buffer); 346 kernel_restart(buffer);
347 break; 347 break;
348 348
349#ifdef CONFIG_KEXEC 349#ifdef CONFIG_KEXEC_CORE
350 case LINUX_REBOOT_CMD_KEXEC: 350 case LINUX_REBOOT_CMD_KEXEC:
351 ret = kernel_kexec(); 351 ret = kernel_kexec();
352 break; 352 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 19b62b522158..715cc57cc66a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -621,7 +621,7 @@ static struct ctl_table kern_table[] = {
621 .proc_handler = proc_dointvec, 621 .proc_handler = proc_dointvec,
622 }, 622 },
623#endif 623#endif
624#ifdef CONFIG_KEXEC 624#ifdef CONFIG_KEXEC_CORE
625 { 625 {
626 .procname = "kexec_load_disabled", 626 .procname = "kexec_load_disabled",
627 .data = &kexec_load_disabled, 627 .data = &kexec_load_disabled,