diff options
author | Dave Young <dyoung@redhat.com> | 2015-09-09 18:38:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-10 16:29:01 -0400 |
commit | 2965faa5e03d1e71e9ff9aa143fff39e0a77543a (patch) | |
tree | 78b12008d7078a9cd40e157d5b18b795b14d5d9c | |
parent | a43cac0d9dc2073ff2245a171429ddbe1accece7 (diff) |
kexec: split kexec_load syscall from kexec core code
There are two kexec load syscalls, kexec_load another and kexec_file_load.
kexec_file_load has been splited as kernel/kexec_file.c. In this patch I
split kexec_load syscall code to kernel/kexec.c.
And add a new kconfig option KEXEC_CORE, so we can disable kexec_load and
use kexec_file_load only, or vice verse.
The original requirement is from Ted Ts'o, he want kexec kernel signature
being checked with CONFIG_KEXEC_VERIFY_SIG enabled. But kexec-tools use
kexec_load syscall can bypass the checking.
Vivek Goyal proposed to create a common kconfig option so user can compile
in only one syscall for loading kexec kernel. KEXEC/KEXEC_FILE selects
KEXEC_CORE so that old config files still work.
Because there's general code need CONFIG_KEXEC_CORE, so I updated all the
architecture Kconfig with a new option KEXEC_CORE, and let KEXEC selects
KEXEC_CORE in arch Kconfig. Also updated general kernel code with to
kexec_load syscall.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Young <dyoung@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
32 files changed, 1560 insertions, 1527 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 8f3564930580..4e949e58b192 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -2,6 +2,9 @@ | |||
2 | # General architecture dependent options | 2 | # General architecture dependent options |
3 | # | 3 | # |
4 | 4 | ||
5 | config KEXEC_CORE | ||
6 | bool | ||
7 | |||
5 | config OPROFILE | 8 | config OPROFILE |
6 | tristate "OProfile system profiling" | 9 | tristate "OProfile system profiling" |
7 | depends on PROFILING | 10 | depends on PROFILING |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0d1b717e1eca..72ad724c67ae 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -2020,6 +2020,7 @@ config KEXEC | |||
2020 | bool "Kexec system call (EXPERIMENTAL)" | 2020 | bool "Kexec system call (EXPERIMENTAL)" |
2021 | depends on (!SMP || PM_SLEEP_SMP) | 2021 | depends on (!SMP || PM_SLEEP_SMP) |
2022 | depends on !CPU_V7M | 2022 | depends on !CPU_V7M |
2023 | select KEXEC_CORE | ||
2023 | help | 2024 | help |
2024 | kexec is a system call that implements the ability to shutdown your | 2025 | kexec is a system call that implements the ability to shutdown your |
2025 | current kernel, and to start another kernel. It is like a reboot | 2026 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 42a91a7aa2b0..eb0249e37981 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -518,6 +518,7 @@ source "drivers/sn/Kconfig" | |||
518 | config KEXEC | 518 | config KEXEC |
519 | bool "kexec system call" | 519 | bool "kexec system call" |
520 | depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) | 520 | depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) |
521 | select KEXEC_CORE | ||
521 | help | 522 | help |
522 | kexec is a system call that implements the ability to shutdown your | 523 | kexec is a system call that implements the ability to shutdown your |
523 | current kernel, and to start another kernel. It is like a reboot | 524 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 2dd8f63bfbbb..498b567f007b 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
@@ -95,6 +95,7 @@ config MMU_SUN3 | |||
95 | config KEXEC | 95 | config KEXEC |
96 | bool "kexec system call" | 96 | bool "kexec system call" |
97 | depends on M68KCLASSIC | 97 | depends on M68KCLASSIC |
98 | select KEXEC_CORE | ||
98 | help | 99 | help |
99 | kexec is a system call that implements the ability to shutdown your | 100 | kexec is a system call that implements the ability to shutdown your |
100 | current kernel, and to start another kernel. It is like a reboot | 101 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 752acca8de1f..e3aa5b0b4ef1 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -2597,6 +2597,7 @@ source "kernel/Kconfig.preempt" | |||
2597 | 2597 | ||
2598 | config KEXEC | 2598 | config KEXEC |
2599 | bool "Kexec system call" | 2599 | bool "Kexec system call" |
2600 | select KEXEC_CORE | ||
2600 | help | 2601 | help |
2601 | kexec is a system call that implements the ability to shutdown your | 2602 | kexec is a system call that implements the ability to shutdown your |
2602 | current kernel, and to start another kernel. It is like a reboot | 2603 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b447918b9e2c..9a7057ec2154 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -420,6 +420,7 @@ config PPC64_SUPPORTS_MEMORY_FAILURE | |||
420 | config KEXEC | 420 | config KEXEC |
421 | bool "kexec system call" | 421 | bool "kexec system call" |
422 | depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) | 422 | depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) |
423 | select KEXEC_CORE | ||
423 | help | 424 | help |
424 | kexec is a system call that implements the ability to shutdown your | 425 | kexec is a system call that implements the ability to shutdown your |
425 | current kernel, and to start another kernel. It is like a reboot | 426 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4827870f7a6d..1d57000b1b24 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -48,6 +48,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC | |||
48 | 48 | ||
49 | config KEXEC | 49 | config KEXEC |
50 | def_bool y | 50 | def_bool y |
51 | select KEXEC_CORE | ||
51 | 52 | ||
52 | config AUDIT_ARCH | 53 | config AUDIT_ARCH |
53 | def_bool y | 54 | def_bool y |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 50057fed819d..d514df7e04dd 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -602,6 +602,7 @@ source kernel/Kconfig.hz | |||
602 | config KEXEC | 602 | config KEXEC |
603 | bool "kexec system call (EXPERIMENTAL)" | 603 | bool "kexec system call (EXPERIMENTAL)" |
604 | depends on SUPERH32 && MMU | 604 | depends on SUPERH32 && MMU |
605 | select KEXEC_CORE | ||
605 | help | 606 | help |
606 | kexec is a system call that implements the ability to shutdown your | 607 | kexec is a system call that implements the ability to shutdown your |
607 | current kernel, and to start another kernel. It is like a reboot | 608 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 2ba12d761723..106c21bd7f44 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -205,6 +205,7 @@ source "kernel/Kconfig.hz" | |||
205 | 205 | ||
206 | config KEXEC | 206 | config KEXEC |
207 | bool "kexec system call" | 207 | bool "kexec system call" |
208 | select KEXEC_CORE | ||
208 | ---help--- | 209 | ---help--- |
209 | kexec is a system call that implements the ability to shutdown your | 210 | kexec is a system call that implements the ability to shutdown your |
210 | current kernel, and to start another kernel. It is like a reboot | 211 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc0d73eac047..7aef2d52daa0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1754,6 +1754,7 @@ source kernel/Kconfig.hz | |||
1754 | 1754 | ||
1755 | config KEXEC | 1755 | config KEXEC |
1756 | bool "kexec system call" | 1756 | bool "kexec system call" |
1757 | select KEXEC_CORE | ||
1757 | ---help--- | 1758 | ---help--- |
1758 | kexec is a system call that implements the ability to shutdown your | 1759 | kexec is a system call that implements the ability to shutdown your |
1759 | current kernel, and to start another kernel. It is like a reboot | 1760 | current kernel, and to start another kernel. It is like a reboot |
@@ -1770,8 +1771,8 @@ config KEXEC | |||
1770 | 1771 | ||
1771 | config KEXEC_FILE | 1772 | config KEXEC_FILE |
1772 | bool "kexec file based system call" | 1773 | bool "kexec file based system call" |
1774 | select KEXEC_CORE | ||
1773 | select BUILD_BIN2C | 1775 | select BUILD_BIN2C |
1774 | depends on KEXEC | ||
1775 | depends on X86_64 | 1776 | depends on X86_64 |
1776 | depends on CRYPTO=y | 1777 | depends on CRYPTO=y |
1777 | depends on CRYPTO_SHA256=y | 1778 | depends on CRYPTO_SHA256=y |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 16ef02596db2..2d6b309c8e9a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -414,7 +414,7 @@ xloadflags: | |||
414 | # define XLF23 0 | 414 | # define XLF23 0 |
415 | #endif | 415 | #endif |
416 | 416 | ||
417 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) | 417 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE) |
418 | # define XLF4 XLF_EFI_KEXEC | 418 | # define XLF4 XLF_EFI_KEXEC |
419 | #else | 419 | #else |
420 | # define XLF4 0 | 420 | # define XLF4 0 |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 32ce71375b21..b130d59406fb 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -29,7 +29,7 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs, | |||
29 | extern void __show_regs(struct pt_regs *regs, int all); | 29 | extern void __show_regs(struct pt_regs *regs, int all); |
30 | extern unsigned long oops_begin(void); | 30 | extern unsigned long oops_begin(void); |
31 | extern void oops_end(unsigned long, struct pt_regs *, int signr); | 31 | extern void oops_end(unsigned long, struct pt_regs *, int signr); |
32 | #ifdef CONFIG_KEXEC | 32 | #ifdef CONFIG_KEXEC_CORE |
33 | extern int in_crash_kexec; | 33 | extern int in_crash_kexec; |
34 | #else | 34 | #else |
35 | /* no crash dump is ever in progress if no crash kernel can be kexec'd */ | 35 | /* no crash dump is ever in progress if no crash kernel can be kexec'd */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9ffdf25e5b86..b1b78ffe01d0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -71,8 +71,8 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o | |||
71 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 71 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
72 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o | 72 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o |
73 | obj-$(CONFIG_X86_TSC) += trace_clock.o | 73 | obj-$(CONFIG_X86_TSC) += trace_clock.o |
74 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 74 | obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o |
75 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 75 | obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o |
76 | obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o | 76 | obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o |
77 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 77 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
78 | obj-y += kprobes/ | 78 | obj-y += kprobes/ |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 49487b488061..2c7aafa70702 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -200,7 +200,7 @@ static void kvm_setup_secondary_clock(void) | |||
200 | * kind of shutdown from our side, we unregister the clock by writting anything | 200 | * kind of shutdown from our side, we unregister the clock by writting anything |
201 | * that does not have the 'enable' bit set in the msr | 201 | * that does not have the 'enable' bit set in the msr |
202 | */ | 202 | */ |
203 | #ifdef CONFIG_KEXEC | 203 | #ifdef CONFIG_KEXEC_CORE |
204 | static void kvm_crash_shutdown(struct pt_regs *regs) | 204 | static void kvm_crash_shutdown(struct pt_regs *regs) |
205 | { | 205 | { |
206 | native_write_msr(msr_kvm_system_time, 0, 0); | 206 | native_write_msr(msr_kvm_system_time, 0, 0); |
@@ -259,7 +259,7 @@ void __init kvmclock_init(void) | |||
259 | x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; | 259 | x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; |
260 | x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; | 260 | x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; |
261 | machine_ops.shutdown = kvm_shutdown; | 261 | machine_ops.shutdown = kvm_shutdown; |
262 | #ifdef CONFIG_KEXEC | 262 | #ifdef CONFIG_KEXEC_CORE |
263 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 263 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
264 | #endif | 264 | #endif |
265 | kvm_get_preset_lpj(); | 265 | kvm_get_preset_lpj(); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 86db4bcd7ce5..02693dd9a079 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -673,7 +673,7 @@ struct machine_ops machine_ops = { | |||
673 | .emergency_restart = native_machine_emergency_restart, | 673 | .emergency_restart = native_machine_emergency_restart, |
674 | .restart = native_machine_restart, | 674 | .restart = native_machine_restart, |
675 | .halt = native_machine_halt, | 675 | .halt = native_machine_halt, |
676 | #ifdef CONFIG_KEXEC | 676 | #ifdef CONFIG_KEXEC_CORE |
677 | .crash_shutdown = native_machine_crash_shutdown, | 677 | .crash_shutdown = native_machine_crash_shutdown, |
678 | #endif | 678 | #endif |
679 | }; | 679 | }; |
@@ -703,7 +703,7 @@ void machine_halt(void) | |||
703 | machine_ops.halt(); | 703 | machine_ops.halt(); |
704 | } | 704 | } |
705 | 705 | ||
706 | #ifdef CONFIG_KEXEC | 706 | #ifdef CONFIG_KEXEC_CORE |
707 | void machine_crash_shutdown(struct pt_regs *regs) | 707 | void machine_crash_shutdown(struct pt_regs *regs) |
708 | { | 708 | { |
709 | machine_ops.crash_shutdown(regs); | 709 | machine_ops.crash_shutdown(regs); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index baadbf90a7c5..fdb7f2a2d328 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -478,7 +478,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
478 | * --------- Crashkernel reservation ------------------------------ | 478 | * --------- Crashkernel reservation ------------------------------ |
479 | */ | 479 | */ |
480 | 480 | ||
481 | #ifdef CONFIG_KEXEC | 481 | #ifdef CONFIG_KEXEC_CORE |
482 | 482 | ||
483 | /* | 483 | /* |
484 | * Keep the crash kernel below this limit. On 32 bits earlier kernels | 484 | * Keep the crash kernel below this limit. On 32 bits earlier kernels |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 00bf300fd846..74e4bf11f562 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -364,7 +364,7 @@ INIT_PER_CPU(irq_stack_union); | |||
364 | 364 | ||
365 | #endif /* CONFIG_X86_32 */ | 365 | #endif /* CONFIG_X86_32 */ |
366 | 366 | ||
367 | #ifdef CONFIG_KEXEC | 367 | #ifdef CONFIG_KEXEC_CORE |
368 | #include <asm/kexec.h> | 368 | #include <asm/kexec.h> |
369 | 369 | ||
370 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | 370 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 148ea2016022..d01986832afc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1264,7 +1264,7 @@ static void vmcs_load(struct vmcs *vmcs) | |||
1264 | vmcs, phys_addr); | 1264 | vmcs, phys_addr); |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | #ifdef CONFIG_KEXEC | 1267 | #ifdef CONFIG_KEXEC_CORE |
1268 | /* | 1268 | /* |
1269 | * This bitmap is used to indicate whether the vmclear | 1269 | * This bitmap is used to indicate whether the vmclear |
1270 | * operation is enabled on all cpus. All disabled by | 1270 | * operation is enabled on all cpus. All disabled by |
@@ -1302,7 +1302,7 @@ static void crash_vmclear_local_loaded_vmcss(void) | |||
1302 | #else | 1302 | #else |
1303 | static inline void crash_enable_local_vmclear(int cpu) { } | 1303 | static inline void crash_enable_local_vmclear(int cpu) { } |
1304 | static inline void crash_disable_local_vmclear(int cpu) { } | 1304 | static inline void crash_disable_local_vmclear(int cpu) { } |
1305 | #endif /* CONFIG_KEXEC */ | 1305 | #endif /* CONFIG_KEXEC_CORE */ |
1306 | 1306 | ||
1307 | static void __loaded_vmcs_clear(void *arg) | 1307 | static void __loaded_vmcs_clear(void *arg) |
1308 | { | 1308 | { |
@@ -10411,7 +10411,7 @@ static int __init vmx_init(void) | |||
10411 | if (r) | 10411 | if (r) |
10412 | return r; | 10412 | return r; |
10413 | 10413 | ||
10414 | #ifdef CONFIG_KEXEC | 10414 | #ifdef CONFIG_KEXEC_CORE |
10415 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, | 10415 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, |
10416 | crash_vmclear_local_loaded_vmcss); | 10416 | crash_vmclear_local_loaded_vmcss); |
10417 | #endif | 10417 | #endif |
@@ -10421,7 +10421,7 @@ static int __init vmx_init(void) | |||
10421 | 10421 | ||
10422 | static void __exit vmx_exit(void) | 10422 | static void __exit vmx_exit(void) |
10423 | { | 10423 | { |
10424 | #ifdef CONFIG_KEXEC | 10424 | #ifdef CONFIG_KEXEC_CORE |
10425 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); | 10425 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); |
10426 | synchronize_rcu(); | 10426 | synchronize_rcu(); |
10427 | #endif | 10427 | #endif |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e4308fe6afe8..1db84c0758b7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -650,7 +650,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) | |||
650 | 650 | ||
651 | static void __init save_runtime_map(void) | 651 | static void __init save_runtime_map(void) |
652 | { | 652 | { |
653 | #ifdef CONFIG_KEXEC | 653 | #ifdef CONFIG_KEXEC_CORE |
654 | efi_memory_desc_t *md; | 654 | efi_memory_desc_t *md; |
655 | void *tmp, *p, *q = NULL; | 655 | void *tmp, *p, *q = NULL; |
656 | int count = 0; | 656 | int count = 0; |
@@ -748,7 +748,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift) | |||
748 | 748 | ||
749 | static void __init kexec_enter_virtual_mode(void) | 749 | static void __init kexec_enter_virtual_mode(void) |
750 | { | 750 | { |
751 | #ifdef CONFIG_KEXEC | 751 | #ifdef CONFIG_KEXEC_CORE |
752 | efi_memory_desc_t *md; | 752 | efi_memory_desc_t *md; |
753 | void *p; | 753 | void *p; |
754 | 754 | ||
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 020c101c255f..5c9f63fa6abf 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c | |||
@@ -492,7 +492,7 @@ static void uv_nmi_touch_watchdogs(void) | |||
492 | touch_nmi_watchdog(); | 492 | touch_nmi_watchdog(); |
493 | } | 493 | } |
494 | 494 | ||
495 | #if defined(CONFIG_KEXEC) | 495 | #if defined(CONFIG_KEXEC_CORE) |
496 | static atomic_t uv_nmi_kexec_failed; | 496 | static atomic_t uv_nmi_kexec_failed; |
497 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | 497 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) |
498 | { | 498 | { |
@@ -519,13 +519,13 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | |||
519 | uv_nmi_sync_exit(0); | 519 | uv_nmi_sync_exit(0); |
520 | } | 520 | } |
521 | 521 | ||
522 | #else /* !CONFIG_KEXEC */ | 522 | #else /* !CONFIG_KEXEC_CORE */ |
523 | static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | 523 | static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) |
524 | { | 524 | { |
525 | if (master) | 525 | if (master) |
526 | pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); | 526 | pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); |
527 | } | 527 | } |
528 | #endif /* !CONFIG_KEXEC */ | 528 | #endif /* !CONFIG_KEXEC_CORE */ |
529 | 529 | ||
530 | #ifdef CONFIG_KGDB | 530 | #ifdef CONFIG_KGDB |
531 | #ifdef CONFIG_KGDB_KDB | 531 | #ifdef CONFIG_KGDB_KDB |
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 54071c148340..84533e02fbf8 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig | |||
@@ -43,7 +43,7 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE | |||
43 | 43 | ||
44 | config EFI_RUNTIME_MAP | 44 | config EFI_RUNTIME_MAP |
45 | bool "Export efi runtime maps to sysfs" | 45 | bool "Export efi runtime maps to sysfs" |
46 | depends on X86 && EFI && KEXEC | 46 | depends on X86 && EFI && KEXEC_CORE |
47 | default y | 47 | default y |
48 | help | 48 | help |
49 | Export efi runtime memory maps to /sys/firmware/efi/runtime-map. | 49 | Export efi runtime memory maps to /sys/firmware/efi/runtime-map. |
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 52a880ca1768..dd652f2ae03d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c | |||
@@ -467,7 +467,7 @@ static void pci_device_shutdown(struct device *dev) | |||
467 | pci_msi_shutdown(pci_dev); | 467 | pci_msi_shutdown(pci_dev); |
468 | pci_msix_shutdown(pci_dev); | 468 | pci_msix_shutdown(pci_dev); |
469 | 469 | ||
470 | #ifdef CONFIG_KEXEC | 470 | #ifdef CONFIG_KEXEC_CORE |
471 | /* | 471 | /* |
472 | * If this is a kexec reboot, turn off Bus Master bit on the | 472 | * If this is a kexec reboot, turn off Bus Master bit on the |
473 | * device to tell it to not continue to do DMA. Don't touch | 473 | * device to tell it to not continue to do DMA. Don't touch |
diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ab150ade0d18..d140b1e9faa7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h | |||
@@ -16,7 +16,7 @@ | |||
16 | 16 | ||
17 | #include <uapi/linux/kexec.h> | 17 | #include <uapi/linux/kexec.h> |
18 | 18 | ||
19 | #ifdef CONFIG_KEXEC | 19 | #ifdef CONFIG_KEXEC_CORE |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/linkage.h> | 21 | #include <linux/linkage.h> |
22 | #include <linux/compat.h> | 22 | #include <linux/compat.h> |
@@ -329,13 +329,13 @@ int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, | |||
329 | int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | 329 | int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, |
330 | unsigned int relsec); | 330 | unsigned int relsec); |
331 | 331 | ||
332 | #else /* !CONFIG_KEXEC */ | 332 | #else /* !CONFIG_KEXEC_CORE */ |
333 | struct pt_regs; | 333 | struct pt_regs; |
334 | struct task_struct; | 334 | struct task_struct; |
335 | static inline void crash_kexec(struct pt_regs *regs) { } | 335 | static inline void crash_kexec(struct pt_regs *regs) { } |
336 | static inline int kexec_should_crash(struct task_struct *p) { return 0; } | 336 | static inline int kexec_should_crash(struct task_struct *p) { return 0; } |
337 | #define kexec_in_progress false | 337 | #define kexec_in_progress false |
338 | #endif /* CONFIG_KEXEC */ | 338 | #endif /* CONFIG_KEXEC_CORE */ |
339 | 339 | ||
340 | #endif /* !defined(__ASSEBMLY__) */ | 340 | #endif /* !defined(__ASSEBMLY__) */ |
341 | 341 | ||
diff --git a/init/initramfs.c b/init/initramfs.c index ad1bd7787bbb..b32ad7d97ac9 100644 --- a/init/initramfs.c +++ b/init/initramfs.c | |||
@@ -526,14 +526,14 @@ extern unsigned long __initramfs_size; | |||
526 | 526 | ||
527 | static void __init free_initrd(void) | 527 | static void __init free_initrd(void) |
528 | { | 528 | { |
529 | #ifdef CONFIG_KEXEC | 529 | #ifdef CONFIG_KEXEC_CORE |
530 | unsigned long crashk_start = (unsigned long)__va(crashk_res.start); | 530 | unsigned long crashk_start = (unsigned long)__va(crashk_res.start); |
531 | unsigned long crashk_end = (unsigned long)__va(crashk_res.end); | 531 | unsigned long crashk_end = (unsigned long)__va(crashk_res.end); |
532 | #endif | 532 | #endif |
533 | if (do_retain_initrd) | 533 | if (do_retain_initrd) |
534 | goto skip; | 534 | goto skip; |
535 | 535 | ||
536 | #ifdef CONFIG_KEXEC | 536 | #ifdef CONFIG_KEXEC_CORE |
537 | /* | 537 | /* |
538 | * If the initrd region is overlapped with crashkernel reserved region, | 538 | * If the initrd region is overlapped with crashkernel reserved region, |
539 | * free only memory that is not part of crashkernel region. | 539 | * free only memory that is not part of crashkernel region. |
diff --git a/kernel/Makefile b/kernel/Makefile index 1b4890af5a65..d4988410b410 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -49,6 +49,7 @@ obj-$(CONFIG_MODULES) += module.o | |||
49 | obj-$(CONFIG_MODULE_SIG) += module_signing.o | 49 | obj-$(CONFIG_MODULE_SIG) += module_signing.o |
50 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 50 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
51 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 51 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
52 | obj-$(CONFIG_KEXEC_CORE) += kexec_core.o | ||
52 | obj-$(CONFIG_KEXEC) += kexec.o | 53 | obj-$(CONFIG_KEXEC) += kexec.o |
53 | obj-$(CONFIG_KEXEC_FILE) += kexec_file.o | 54 | obj-$(CONFIG_KEXEC_FILE) += kexec_file.o |
54 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o | 55 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o |
diff --git a/kernel/events/core.c b/kernel/events/core.c index e8183895691c..f548f69c4299 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -9094,7 +9094,7 @@ static void perf_event_init_cpu(int cpu) | |||
9094 | mutex_unlock(&swhash->hlist_mutex); | 9094 | mutex_unlock(&swhash->hlist_mutex); |
9095 | } | 9095 | } |
9096 | 9096 | ||
9097 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC | 9097 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE |
9098 | static void __perf_event_exit_context(void *__info) | 9098 | static void __perf_event_exit_context(void *__info) |
9099 | { | 9099 | { |
9100 | struct remove_event re = { .detach_group = true }; | 9100 | struct remove_event re = { .detach_group = true }; |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 2d73ecfa5505..4c5edc357923 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1,148 +1,23 @@ | |||
1 | /* | 1 | /* |
2 | * kexec.c - kexec system call | 2 | * kexec.c - kexec_load system call |
3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> | 3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> |
4 | * | 4 | * |
5 | * This source code is licensed under the GNU General Public License, | 5 | * This source code is licensed under the GNU General Public License, |
6 | * Version 2. See the file COPYING for more details. | 6 | * Version 2. See the file COPYING for more details. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #define pr_fmt(fmt) "kexec: " fmt | ||
10 | |||
11 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
13 | #include <linux/file.h> | 11 | #include <linux/file.h> |
14 | #include <linux/slab.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
17 | #include <linux/mutex.h> | 13 | #include <linux/mutex.h> |
18 | #include <linux/list.h> | 14 | #include <linux/list.h> |
19 | #include <linux/highmem.h> | ||
20 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
21 | #include <linux/reboot.h> | ||
22 | #include <linux/ioport.h> | ||
23 | #include <linux/hardirq.h> | ||
24 | #include <linux/elf.h> | ||
25 | #include <linux/elfcore.h> | ||
26 | #include <linux/utsname.h> | ||
27 | #include <linux/numa.h> | ||
28 | #include <linux/suspend.h> | ||
29 | #include <linux/device.h> | ||
30 | #include <linux/freezer.h> | ||
31 | #include <linux/vmalloc.h> | 16 | #include <linux/vmalloc.h> |
32 | #include <linux/pm.h> | 17 | #include <linux/slab.h> |
33 | #include <linux/cpu.h> | ||
34 | #include <linux/console.h> | ||
35 | #include <linux/swap.h> | ||
36 | #include <linux/syscore_ops.h> | ||
37 | #include <linux/compiler.h> | ||
38 | #include <linux/hugetlb.h> | ||
39 | |||
40 | #include <asm/page.h> | ||
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/io.h> | ||
43 | #include <asm/sections.h> | ||
44 | 18 | ||
45 | #include <crypto/hash.h> | ||
46 | #include <crypto/sha.h> | ||
47 | #include "kexec_internal.h" | 19 | #include "kexec_internal.h" |
48 | 20 | ||
49 | DEFINE_MUTEX(kexec_mutex); | ||
50 | |||
51 | /* Per cpu memory for storing cpu states in case of system crash. */ | ||
52 | note_buf_t __percpu *crash_notes; | ||
53 | |||
54 | /* vmcoreinfo stuff */ | ||
55 | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; | ||
56 | u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | ||
57 | size_t vmcoreinfo_size; | ||
58 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | ||
59 | |||
60 | /* Flag to indicate we are going to kexec a new kernel */ | ||
61 | bool kexec_in_progress = false; | ||
62 | |||
63 | |||
64 | /* Location of the reserved area for the crash kernel */ | ||
65 | struct resource crashk_res = { | ||
66 | .name = "Crash kernel", | ||
67 | .start = 0, | ||
68 | .end = 0, | ||
69 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
70 | }; | ||
71 | struct resource crashk_low_res = { | ||
72 | .name = "Crash kernel", | ||
73 | .start = 0, | ||
74 | .end = 0, | ||
75 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
76 | }; | ||
77 | |||
78 | int kexec_should_crash(struct task_struct *p) | ||
79 | { | ||
80 | /* | ||
81 | * If crash_kexec_post_notifiers is enabled, don't run | ||
82 | * crash_kexec() here yet, which must be run after panic | ||
83 | * notifiers in panic(). | ||
84 | */ | ||
85 | if (crash_kexec_post_notifiers) | ||
86 | return 0; | ||
87 | /* | ||
88 | * There are 4 panic() calls in do_exit() path, each of which | ||
89 | * corresponds to each of these 4 conditions. | ||
90 | */ | ||
91 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) | ||
92 | return 1; | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * When kexec transitions to the new kernel there is a one-to-one | ||
98 | * mapping between physical and virtual addresses. On processors | ||
99 | * where you can disable the MMU this is trivial, and easy. For | ||
100 | * others it is still a simple predictable page table to setup. | ||
101 | * | ||
102 | * In that environment kexec copies the new kernel to its final | ||
103 | * resting place. This means I can only support memory whose | ||
104 | * physical address can fit in an unsigned long. In particular | ||
105 | * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. | ||
106 | * If the assembly stub has more restrictive requirements | ||
107 | * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be | ||
108 | * defined more restrictively in <asm/kexec.h>. | ||
109 | * | ||
110 | * The code for the transition from the current kernel to the | ||
111 | * the new kernel is placed in the control_code_buffer, whose size | ||
112 | * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single | ||
113 | * page of memory is necessary, but some architectures require more. | ||
114 | * Because this memory must be identity mapped in the transition from | ||
115 | * virtual to physical addresses it must live in the range | ||
116 | * 0 - TASK_SIZE, as only the user space mappings are arbitrarily | ||
117 | * modifiable. | ||
118 | * | ||
119 | * The assembly stub in the control code buffer is passed a linked list | ||
120 | * of descriptor pages detailing the source pages of the new kernel, | ||
121 | * and the destination addresses of those source pages. As this data | ||
122 | * structure is not used in the context of the current OS, it must | ||
123 | * be self-contained. | ||
124 | * | ||
125 | * The code has been made to work with highmem pages and will use a | ||
126 | * destination page in its final resting place (if it happens | ||
127 | * to allocate it). The end product of this is that most of the | ||
128 | * physical address space, and most of RAM can be used. | ||
129 | * | ||
130 | * Future directions include: | ||
131 | * - allocating a page table with the control code buffer identity | ||
132 | * mapped, to simplify machine_kexec and make kexec_on_panic more | ||
133 | * reliable. | ||
134 | */ | ||
135 | |||
136 | /* | ||
137 | * KIMAGE_NO_DEST is an impossible destination address..., for | ||
138 | * allocating pages whose destination address we do not care about. | ||
139 | */ | ||
140 | #define KIMAGE_NO_DEST (-1UL) | ||
141 | |||
142 | static struct page *kimage_alloc_page(struct kimage *image, | ||
143 | gfp_t gfp_mask, | ||
144 | unsigned long dest); | ||
145 | |||
146 | static int copy_user_segment_list(struct kimage *image, | 21 | static int copy_user_segment_list(struct kimage *image, |
147 | unsigned long nr_segments, | 22 | unsigned long nr_segments, |
148 | struct kexec_segment __user *segments) | 23 | struct kexec_segment __user *segments) |
@@ -160,123 +35,6 @@ static int copy_user_segment_list(struct kimage *image, | |||
160 | return ret; | 35 | return ret; |
161 | } | 36 | } |
162 | 37 | ||
163 | int sanity_check_segment_list(struct kimage *image) | ||
164 | { | ||
165 | int result, i; | ||
166 | unsigned long nr_segments = image->nr_segments; | ||
167 | |||
168 | /* | ||
169 | * Verify we have good destination addresses. The caller is | ||
170 | * responsible for making certain we don't attempt to load | ||
171 | * the new image into invalid or reserved areas of RAM. This | ||
172 | * just verifies it is an address we can use. | ||
173 | * | ||
174 | * Since the kernel does everything in page size chunks ensure | ||
175 | * the destination addresses are page aligned. Too many | ||
176 | * special cases crop of when we don't do this. The most | ||
177 | * insidious is getting overlapping destination addresses | ||
178 | * simply because addresses are changed to page size | ||
179 | * granularity. | ||
180 | */ | ||
181 | result = -EADDRNOTAVAIL; | ||
182 | for (i = 0; i < nr_segments; i++) { | ||
183 | unsigned long mstart, mend; | ||
184 | |||
185 | mstart = image->segment[i].mem; | ||
186 | mend = mstart + image->segment[i].memsz; | ||
187 | if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) | ||
188 | return result; | ||
189 | if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) | ||
190 | return result; | ||
191 | } | ||
192 | |||
193 | /* Verify our destination addresses do not overlap. | ||
194 | * If we alloed overlapping destination addresses | ||
195 | * through very weird things can happen with no | ||
196 | * easy explanation as one segment stops on another. | ||
197 | */ | ||
198 | result = -EINVAL; | ||
199 | for (i = 0; i < nr_segments; i++) { | ||
200 | unsigned long mstart, mend; | ||
201 | unsigned long j; | ||
202 | |||
203 | mstart = image->segment[i].mem; | ||
204 | mend = mstart + image->segment[i].memsz; | ||
205 | for (j = 0; j < i; j++) { | ||
206 | unsigned long pstart, pend; | ||
207 | pstart = image->segment[j].mem; | ||
208 | pend = pstart + image->segment[j].memsz; | ||
209 | /* Do the segments overlap ? */ | ||
210 | if ((mend > pstart) && (mstart < pend)) | ||
211 | return result; | ||
212 | } | ||
213 | } | ||
214 | |||
215 | /* Ensure our buffer sizes are strictly less than | ||
216 | * our memory sizes. This should always be the case, | ||
217 | * and it is easier to check up front than to be surprised | ||
218 | * later on. | ||
219 | */ | ||
220 | result = -EINVAL; | ||
221 | for (i = 0; i < nr_segments; i++) { | ||
222 | if (image->segment[i].bufsz > image->segment[i].memsz) | ||
223 | return result; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Verify we have good destination addresses. Normally | ||
228 | * the caller is responsible for making certain we don't | ||
229 | * attempt to load the new image into invalid or reserved | ||
230 | * areas of RAM. But crash kernels are preloaded into a | ||
231 | * reserved area of ram. We must ensure the addresses | ||
232 | * are in the reserved area otherwise preloading the | ||
233 | * kernel could corrupt things. | ||
234 | */ | ||
235 | |||
236 | if (image->type == KEXEC_TYPE_CRASH) { | ||
237 | result = -EADDRNOTAVAIL; | ||
238 | for (i = 0; i < nr_segments; i++) { | ||
239 | unsigned long mstart, mend; | ||
240 | |||
241 | mstart = image->segment[i].mem; | ||
242 | mend = mstart + image->segment[i].memsz - 1; | ||
243 | /* Ensure we are within the crash kernel limits */ | ||
244 | if ((mstart < crashk_res.start) || | ||
245 | (mend > crashk_res.end)) | ||
246 | return result; | ||
247 | } | ||
248 | } | ||
249 | |||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | struct kimage *do_kimage_alloc_init(void) | ||
254 | { | ||
255 | struct kimage *image; | ||
256 | |||
257 | /* Allocate a controlling structure */ | ||
258 | image = kzalloc(sizeof(*image), GFP_KERNEL); | ||
259 | if (!image) | ||
260 | return NULL; | ||
261 | |||
262 | image->head = 0; | ||
263 | image->entry = &image->head; | ||
264 | image->last_entry = &image->head; | ||
265 | image->control_page = ~0; /* By default this does not apply */ | ||
266 | image->type = KEXEC_TYPE_DEFAULT; | ||
267 | |||
268 | /* Initialize the list of control pages */ | ||
269 | INIT_LIST_HEAD(&image->control_pages); | ||
270 | |||
271 | /* Initialize the list of destination pages */ | ||
272 | INIT_LIST_HEAD(&image->dest_pages); | ||
273 | |||
274 | /* Initialize the list of unusable pages */ | ||
275 | INIT_LIST_HEAD(&image->unusable_pages); | ||
276 | |||
277 | return image; | ||
278 | } | ||
279 | |||
280 | static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, | 38 | static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, |
281 | unsigned long nr_segments, | 39 | unsigned long nr_segments, |
282 | struct kexec_segment __user *segments, | 40 | struct kexec_segment __user *segments, |
@@ -343,597 +101,6 @@ out_free_image: | |||
343 | return ret; | 101 | return ret; |
344 | } | 102 | } |
345 | 103 | ||
346 | int kimage_is_destination_range(struct kimage *image, | ||
347 | unsigned long start, | ||
348 | unsigned long end) | ||
349 | { | ||
350 | unsigned long i; | ||
351 | |||
352 | for (i = 0; i < image->nr_segments; i++) { | ||
353 | unsigned long mstart, mend; | ||
354 | |||
355 | mstart = image->segment[i].mem; | ||
356 | mend = mstart + image->segment[i].memsz; | ||
357 | if ((end > mstart) && (start < mend)) | ||
358 | return 1; | ||
359 | } | ||
360 | |||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | ||
365 | { | ||
366 | struct page *pages; | ||
367 | |||
368 | pages = alloc_pages(gfp_mask, order); | ||
369 | if (pages) { | ||
370 | unsigned int count, i; | ||
371 | pages->mapping = NULL; | ||
372 | set_page_private(pages, order); | ||
373 | count = 1 << order; | ||
374 | for (i = 0; i < count; i++) | ||
375 | SetPageReserved(pages + i); | ||
376 | } | ||
377 | |||
378 | return pages; | ||
379 | } | ||
380 | |||
381 | static void kimage_free_pages(struct page *page) | ||
382 | { | ||
383 | unsigned int order, count, i; | ||
384 | |||
385 | order = page_private(page); | ||
386 | count = 1 << order; | ||
387 | for (i = 0; i < count; i++) | ||
388 | ClearPageReserved(page + i); | ||
389 | __free_pages(page, order); | ||
390 | } | ||
391 | |||
392 | void kimage_free_page_list(struct list_head *list) | ||
393 | { | ||
394 | struct list_head *pos, *next; | ||
395 | |||
396 | list_for_each_safe(pos, next, list) { | ||
397 | struct page *page; | ||
398 | |||
399 | page = list_entry(pos, struct page, lru); | ||
400 | list_del(&page->lru); | ||
401 | kimage_free_pages(page); | ||
402 | } | ||
403 | } | ||
404 | |||
405 | static struct page *kimage_alloc_normal_control_pages(struct kimage *image, | ||
406 | unsigned int order) | ||
407 | { | ||
408 | /* Control pages are special, they are the intermediaries | ||
409 | * that are needed while we copy the rest of the pages | ||
410 | * to their final resting place. As such they must | ||
411 | * not conflict with either the destination addresses | ||
412 | * or memory the kernel is already using. | ||
413 | * | ||
414 | * The only case where we really need more than one of | ||
415 | * these are for architectures where we cannot disable | ||
416 | * the MMU and must instead generate an identity mapped | ||
417 | * page table for all of the memory. | ||
418 | * | ||
419 | * At worst this runs in O(N) of the image size. | ||
420 | */ | ||
421 | struct list_head extra_pages; | ||
422 | struct page *pages; | ||
423 | unsigned int count; | ||
424 | |||
425 | count = 1 << order; | ||
426 | INIT_LIST_HEAD(&extra_pages); | ||
427 | |||
428 | /* Loop while I can allocate a page and the page allocated | ||
429 | * is a destination page. | ||
430 | */ | ||
431 | do { | ||
432 | unsigned long pfn, epfn, addr, eaddr; | ||
433 | |||
434 | pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); | ||
435 | if (!pages) | ||
436 | break; | ||
437 | pfn = page_to_pfn(pages); | ||
438 | epfn = pfn + count; | ||
439 | addr = pfn << PAGE_SHIFT; | ||
440 | eaddr = epfn << PAGE_SHIFT; | ||
441 | if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || | ||
442 | kimage_is_destination_range(image, addr, eaddr)) { | ||
443 | list_add(&pages->lru, &extra_pages); | ||
444 | pages = NULL; | ||
445 | } | ||
446 | } while (!pages); | ||
447 | |||
448 | if (pages) { | ||
449 | /* Remember the allocated page... */ | ||
450 | list_add(&pages->lru, &image->control_pages); | ||
451 | |||
452 | /* Because the page is already in it's destination | ||
453 | * location we will never allocate another page at | ||
454 | * that address. Therefore kimage_alloc_pages | ||
455 | * will not return it (again) and we don't need | ||
456 | * to give it an entry in image->segment[]. | ||
457 | */ | ||
458 | } | ||
459 | /* Deal with the destination pages I have inadvertently allocated. | ||
460 | * | ||
461 | * Ideally I would convert multi-page allocations into single | ||
462 | * page allocations, and add everything to image->dest_pages. | ||
463 | * | ||
464 | * For now it is simpler to just free the pages. | ||
465 | */ | ||
466 | kimage_free_page_list(&extra_pages); | ||
467 | |||
468 | return pages; | ||
469 | } | ||
470 | |||
471 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | ||
472 | unsigned int order) | ||
473 | { | ||
474 | /* Control pages are special, they are the intermediaries | ||
475 | * that are needed while we copy the rest of the pages | ||
476 | * to their final resting place. As such they must | ||
477 | * not conflict with either the destination addresses | ||
478 | * or memory the kernel is already using. | ||
479 | * | ||
480 | * Control pages are also the only pags we must allocate | ||
481 | * when loading a crash kernel. All of the other pages | ||
482 | * are specified by the segments and we just memcpy | ||
483 | * into them directly. | ||
484 | * | ||
485 | * The only case where we really need more than one of | ||
486 | * these are for architectures where we cannot disable | ||
487 | * the MMU and must instead generate an identity mapped | ||
488 | * page table for all of the memory. | ||
489 | * | ||
490 | * Given the low demand this implements a very simple | ||
491 | * allocator that finds the first hole of the appropriate | ||
492 | * size in the reserved memory region, and allocates all | ||
493 | * of the memory up to and including the hole. | ||
494 | */ | ||
495 | unsigned long hole_start, hole_end, size; | ||
496 | struct page *pages; | ||
497 | |||
498 | pages = NULL; | ||
499 | size = (1 << order) << PAGE_SHIFT; | ||
500 | hole_start = (image->control_page + (size - 1)) & ~(size - 1); | ||
501 | hole_end = hole_start + size - 1; | ||
502 | while (hole_end <= crashk_res.end) { | ||
503 | unsigned long i; | ||
504 | |||
505 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) | ||
506 | break; | ||
507 | /* See if I overlap any of the segments */ | ||
508 | for (i = 0; i < image->nr_segments; i++) { | ||
509 | unsigned long mstart, mend; | ||
510 | |||
511 | mstart = image->segment[i].mem; | ||
512 | mend = mstart + image->segment[i].memsz - 1; | ||
513 | if ((hole_end >= mstart) && (hole_start <= mend)) { | ||
514 | /* Advance the hole to the end of the segment */ | ||
515 | hole_start = (mend + (size - 1)) & ~(size - 1); | ||
516 | hole_end = hole_start + size - 1; | ||
517 | break; | ||
518 | } | ||
519 | } | ||
520 | /* If I don't overlap any segments I have found my hole! */ | ||
521 | if (i == image->nr_segments) { | ||
522 | pages = pfn_to_page(hole_start >> PAGE_SHIFT); | ||
523 | break; | ||
524 | } | ||
525 | } | ||
526 | if (pages) | ||
527 | image->control_page = hole_end; | ||
528 | |||
529 | return pages; | ||
530 | } | ||
531 | |||
532 | |||
533 | struct page *kimage_alloc_control_pages(struct kimage *image, | ||
534 | unsigned int order) | ||
535 | { | ||
536 | struct page *pages = NULL; | ||
537 | |||
538 | switch (image->type) { | ||
539 | case KEXEC_TYPE_DEFAULT: | ||
540 | pages = kimage_alloc_normal_control_pages(image, order); | ||
541 | break; | ||
542 | case KEXEC_TYPE_CRASH: | ||
543 | pages = kimage_alloc_crash_control_pages(image, order); | ||
544 | break; | ||
545 | } | ||
546 | |||
547 | return pages; | ||
548 | } | ||
549 | |||
550 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) | ||
551 | { | ||
552 | if (*image->entry != 0) | ||
553 | image->entry++; | ||
554 | |||
555 | if (image->entry == image->last_entry) { | ||
556 | kimage_entry_t *ind_page; | ||
557 | struct page *page; | ||
558 | |||
559 | page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); | ||
560 | if (!page) | ||
561 | return -ENOMEM; | ||
562 | |||
563 | ind_page = page_address(page); | ||
564 | *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; | ||
565 | image->entry = ind_page; | ||
566 | image->last_entry = ind_page + | ||
567 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); | ||
568 | } | ||
569 | *image->entry = entry; | ||
570 | image->entry++; | ||
571 | *image->entry = 0; | ||
572 | |||
573 | return 0; | ||
574 | } | ||
575 | |||
576 | static int kimage_set_destination(struct kimage *image, | ||
577 | unsigned long destination) | ||
578 | { | ||
579 | int result; | ||
580 | |||
581 | destination &= PAGE_MASK; | ||
582 | result = kimage_add_entry(image, destination | IND_DESTINATION); | ||
583 | |||
584 | return result; | ||
585 | } | ||
586 | |||
587 | |||
588 | static int kimage_add_page(struct kimage *image, unsigned long page) | ||
589 | { | ||
590 | int result; | ||
591 | |||
592 | page &= PAGE_MASK; | ||
593 | result = kimage_add_entry(image, page | IND_SOURCE); | ||
594 | |||
595 | return result; | ||
596 | } | ||
597 | |||
598 | |||
599 | static void kimage_free_extra_pages(struct kimage *image) | ||
600 | { | ||
601 | /* Walk through and free any extra destination pages I may have */ | ||
602 | kimage_free_page_list(&image->dest_pages); | ||
603 | |||
604 | /* Walk through and free any unusable pages I have cached */ | ||
605 | kimage_free_page_list(&image->unusable_pages); | ||
606 | |||
607 | } | ||
608 | void kimage_terminate(struct kimage *image) | ||
609 | { | ||
610 | if (*image->entry != 0) | ||
611 | image->entry++; | ||
612 | |||
613 | *image->entry = IND_DONE; | ||
614 | } | ||
615 | |||
616 | #define for_each_kimage_entry(image, ptr, entry) \ | ||
617 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ | ||
618 | ptr = (entry & IND_INDIRECTION) ? \ | ||
619 | phys_to_virt((entry & PAGE_MASK)) : ptr + 1) | ||
620 | |||
621 | static void kimage_free_entry(kimage_entry_t entry) | ||
622 | { | ||
623 | struct page *page; | ||
624 | |||
625 | page = pfn_to_page(entry >> PAGE_SHIFT); | ||
626 | kimage_free_pages(page); | ||
627 | } | ||
628 | |||
629 | void kimage_free(struct kimage *image) | ||
630 | { | ||
631 | kimage_entry_t *ptr, entry; | ||
632 | kimage_entry_t ind = 0; | ||
633 | |||
634 | if (!image) | ||
635 | return; | ||
636 | |||
637 | kimage_free_extra_pages(image); | ||
638 | for_each_kimage_entry(image, ptr, entry) { | ||
639 | if (entry & IND_INDIRECTION) { | ||
640 | /* Free the previous indirection page */ | ||
641 | if (ind & IND_INDIRECTION) | ||
642 | kimage_free_entry(ind); | ||
643 | /* Save this indirection page until we are | ||
644 | * done with it. | ||
645 | */ | ||
646 | ind = entry; | ||
647 | } else if (entry & IND_SOURCE) | ||
648 | kimage_free_entry(entry); | ||
649 | } | ||
650 | /* Free the final indirection page */ | ||
651 | if (ind & IND_INDIRECTION) | ||
652 | kimage_free_entry(ind); | ||
653 | |||
654 | /* Handle any machine specific cleanup */ | ||
655 | machine_kexec_cleanup(image); | ||
656 | |||
657 | /* Free the kexec control pages... */ | ||
658 | kimage_free_page_list(&image->control_pages); | ||
659 | |||
660 | /* | ||
661 | * Free up any temporary buffers allocated. This might hit if | ||
662 | * error occurred much later after buffer allocation. | ||
663 | */ | ||
664 | if (image->file_mode) | ||
665 | kimage_file_post_load_cleanup(image); | ||
666 | |||
667 | kfree(image); | ||
668 | } | ||
669 | |||
670 | static kimage_entry_t *kimage_dst_used(struct kimage *image, | ||
671 | unsigned long page) | ||
672 | { | ||
673 | kimage_entry_t *ptr, entry; | ||
674 | unsigned long destination = 0; | ||
675 | |||
676 | for_each_kimage_entry(image, ptr, entry) { | ||
677 | if (entry & IND_DESTINATION) | ||
678 | destination = entry & PAGE_MASK; | ||
679 | else if (entry & IND_SOURCE) { | ||
680 | if (page == destination) | ||
681 | return ptr; | ||
682 | destination += PAGE_SIZE; | ||
683 | } | ||
684 | } | ||
685 | |||
686 | return NULL; | ||
687 | } | ||
688 | |||
689 | static struct page *kimage_alloc_page(struct kimage *image, | ||
690 | gfp_t gfp_mask, | ||
691 | unsigned long destination) | ||
692 | { | ||
693 | /* | ||
694 | * Here we implement safeguards to ensure that a source page | ||
695 | * is not copied to its destination page before the data on | ||
696 | * the destination page is no longer useful. | ||
697 | * | ||
698 | * To do this we maintain the invariant that a source page is | ||
699 | * either its own destination page, or it is not a | ||
700 | * destination page at all. | ||
701 | * | ||
702 | * That is slightly stronger than required, but the proof | ||
703 | * that no problems will not occur is trivial, and the | ||
704 | * implementation is simply to verify. | ||
705 | * | ||
706 | * When allocating all pages normally this algorithm will run | ||
707 | * in O(N) time, but in the worst case it will run in O(N^2) | ||
708 | * time. If the runtime is a problem the data structures can | ||
709 | * be fixed. | ||
710 | */ | ||
711 | struct page *page; | ||
712 | unsigned long addr; | ||
713 | |||
714 | /* | ||
715 | * Walk through the list of destination pages, and see if I | ||
716 | * have a match. | ||
717 | */ | ||
718 | list_for_each_entry(page, &image->dest_pages, lru) { | ||
719 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
720 | if (addr == destination) { | ||
721 | list_del(&page->lru); | ||
722 | return page; | ||
723 | } | ||
724 | } | ||
725 | page = NULL; | ||
726 | while (1) { | ||
727 | kimage_entry_t *old; | ||
728 | |||
729 | /* Allocate a page, if we run out of memory give up */ | ||
730 | page = kimage_alloc_pages(gfp_mask, 0); | ||
731 | if (!page) | ||
732 | return NULL; | ||
733 | /* If the page cannot be used file it away */ | ||
734 | if (page_to_pfn(page) > | ||
735 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { | ||
736 | list_add(&page->lru, &image->unusable_pages); | ||
737 | continue; | ||
738 | } | ||
739 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
740 | |||
741 | /* If it is the destination page we want use it */ | ||
742 | if (addr == destination) | ||
743 | break; | ||
744 | |||
745 | /* If the page is not a destination page use it */ | ||
746 | if (!kimage_is_destination_range(image, addr, | ||
747 | addr + PAGE_SIZE)) | ||
748 | break; | ||
749 | |||
750 | /* | ||
751 | * I know that the page is someones destination page. | ||
752 | * See if there is already a source page for this | ||
753 | * destination page. And if so swap the source pages. | ||
754 | */ | ||
755 | old = kimage_dst_used(image, addr); | ||
756 | if (old) { | ||
757 | /* If so move it */ | ||
758 | unsigned long old_addr; | ||
759 | struct page *old_page; | ||
760 | |||
761 | old_addr = *old & PAGE_MASK; | ||
762 | old_page = pfn_to_page(old_addr >> PAGE_SHIFT); | ||
763 | copy_highpage(page, old_page); | ||
764 | *old = addr | (*old & ~PAGE_MASK); | ||
765 | |||
766 | /* The old page I have found cannot be a | ||
767 | * destination page, so return it if it's | ||
768 | * gfp_flags honor the ones passed in. | ||
769 | */ | ||
770 | if (!(gfp_mask & __GFP_HIGHMEM) && | ||
771 | PageHighMem(old_page)) { | ||
772 | kimage_free_pages(old_page); | ||
773 | continue; | ||
774 | } | ||
775 | addr = old_addr; | ||
776 | page = old_page; | ||
777 | break; | ||
778 | } else { | ||
779 | /* Place the page on the destination list I | ||
780 | * will use it later. | ||
781 | */ | ||
782 | list_add(&page->lru, &image->dest_pages); | ||
783 | } | ||
784 | } | ||
785 | |||
786 | return page; | ||
787 | } | ||
788 | |||
789 | static int kimage_load_normal_segment(struct kimage *image, | ||
790 | struct kexec_segment *segment) | ||
791 | { | ||
792 | unsigned long maddr; | ||
793 | size_t ubytes, mbytes; | ||
794 | int result; | ||
795 | unsigned char __user *buf = NULL; | ||
796 | unsigned char *kbuf = NULL; | ||
797 | |||
798 | result = 0; | ||
799 | if (image->file_mode) | ||
800 | kbuf = segment->kbuf; | ||
801 | else | ||
802 | buf = segment->buf; | ||
803 | ubytes = segment->bufsz; | ||
804 | mbytes = segment->memsz; | ||
805 | maddr = segment->mem; | ||
806 | |||
807 | result = kimage_set_destination(image, maddr); | ||
808 | if (result < 0) | ||
809 | goto out; | ||
810 | |||
811 | while (mbytes) { | ||
812 | struct page *page; | ||
813 | char *ptr; | ||
814 | size_t uchunk, mchunk; | ||
815 | |||
816 | page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); | ||
817 | if (!page) { | ||
818 | result = -ENOMEM; | ||
819 | goto out; | ||
820 | } | ||
821 | result = kimage_add_page(image, page_to_pfn(page) | ||
822 | << PAGE_SHIFT); | ||
823 | if (result < 0) | ||
824 | goto out; | ||
825 | |||
826 | ptr = kmap(page); | ||
827 | /* Start with a clear page */ | ||
828 | clear_page(ptr); | ||
829 | ptr += maddr & ~PAGE_MASK; | ||
830 | mchunk = min_t(size_t, mbytes, | ||
831 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
832 | uchunk = min(ubytes, mchunk); | ||
833 | |||
834 | /* For file based kexec, source pages are in kernel memory */ | ||
835 | if (image->file_mode) | ||
836 | memcpy(ptr, kbuf, uchunk); | ||
837 | else | ||
838 | result = copy_from_user(ptr, buf, uchunk); | ||
839 | kunmap(page); | ||
840 | if (result) { | ||
841 | result = -EFAULT; | ||
842 | goto out; | ||
843 | } | ||
844 | ubytes -= uchunk; | ||
845 | maddr += mchunk; | ||
846 | if (image->file_mode) | ||
847 | kbuf += mchunk; | ||
848 | else | ||
849 | buf += mchunk; | ||
850 | mbytes -= mchunk; | ||
851 | } | ||
852 | out: | ||
853 | return result; | ||
854 | } | ||
855 | |||
856 | static int kimage_load_crash_segment(struct kimage *image, | ||
857 | struct kexec_segment *segment) | ||
858 | { | ||
859 | /* For crash dumps kernels we simply copy the data from | ||
860 | * user space to it's destination. | ||
861 | * We do things a page at a time for the sake of kmap. | ||
862 | */ | ||
863 | unsigned long maddr; | ||
864 | size_t ubytes, mbytes; | ||
865 | int result; | ||
866 | unsigned char __user *buf = NULL; | ||
867 | unsigned char *kbuf = NULL; | ||
868 | |||
869 | result = 0; | ||
870 | if (image->file_mode) | ||
871 | kbuf = segment->kbuf; | ||
872 | else | ||
873 | buf = segment->buf; | ||
874 | ubytes = segment->bufsz; | ||
875 | mbytes = segment->memsz; | ||
876 | maddr = segment->mem; | ||
877 | while (mbytes) { | ||
878 | struct page *page; | ||
879 | char *ptr; | ||
880 | size_t uchunk, mchunk; | ||
881 | |||
882 | page = pfn_to_page(maddr >> PAGE_SHIFT); | ||
883 | if (!page) { | ||
884 | result = -ENOMEM; | ||
885 | goto out; | ||
886 | } | ||
887 | ptr = kmap(page); | ||
888 | ptr += maddr & ~PAGE_MASK; | ||
889 | mchunk = min_t(size_t, mbytes, | ||
890 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
891 | uchunk = min(ubytes, mchunk); | ||
892 | if (mchunk > uchunk) { | ||
893 | /* Zero the trailing part of the page */ | ||
894 | memset(ptr + uchunk, 0, mchunk - uchunk); | ||
895 | } | ||
896 | |||
897 | /* For file based kexec, source pages are in kernel memory */ | ||
898 | if (image->file_mode) | ||
899 | memcpy(ptr, kbuf, uchunk); | ||
900 | else | ||
901 | result = copy_from_user(ptr, buf, uchunk); | ||
902 | kexec_flush_icache_page(page); | ||
903 | kunmap(page); | ||
904 | if (result) { | ||
905 | result = -EFAULT; | ||
906 | goto out; | ||
907 | } | ||
908 | ubytes -= uchunk; | ||
909 | maddr += mchunk; | ||
910 | if (image->file_mode) | ||
911 | kbuf += mchunk; | ||
912 | else | ||
913 | buf += mchunk; | ||
914 | mbytes -= mchunk; | ||
915 | } | ||
916 | out: | ||
917 | return result; | ||
918 | } | ||
919 | |||
920 | int kimage_load_segment(struct kimage *image, | ||
921 | struct kexec_segment *segment) | ||
922 | { | ||
923 | int result = -ENOMEM; | ||
924 | |||
925 | switch (image->type) { | ||
926 | case KEXEC_TYPE_DEFAULT: | ||
927 | result = kimage_load_normal_segment(image, segment); | ||
928 | break; | ||
929 | case KEXEC_TYPE_CRASH: | ||
930 | result = kimage_load_crash_segment(image, segment); | ||
931 | break; | ||
932 | } | ||
933 | |||
934 | return result; | ||
935 | } | ||
936 | |||
937 | /* | 104 | /* |
938 | * Exec Kernel system call: for obvious reasons only root may call it. | 105 | * Exec Kernel system call: for obvious reasons only root may call it. |
939 | * | 106 | * |
@@ -954,9 +121,6 @@ int kimage_load_segment(struct kimage *image, | |||
954 | * kexec does not sync, or unmount filesystems so if you need | 121 | * kexec does not sync, or unmount filesystems so if you need |
955 | * that to happen you need to do that yourself. | 122 | * that to happen you need to do that yourself. |
956 | */ | 123 | */ |
957 | struct kimage *kexec_image; | ||
958 | struct kimage *kexec_crash_image; | ||
959 | int kexec_load_disabled; | ||
960 | 124 | ||
961 | SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, | 125 | SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, |
962 | struct kexec_segment __user *, segments, unsigned long, flags) | 126 | struct kexec_segment __user *, segments, unsigned long, flags) |
@@ -1051,18 +215,6 @@ out: | |||
1051 | return result; | 215 | return result; |
1052 | } | 216 | } |
1053 | 217 | ||
1054 | /* | ||
1055 | * Add and remove page tables for crashkernel memory | ||
1056 | * | ||
1057 | * Provide an empty default implementation here -- architecture | ||
1058 | * code may override this | ||
1059 | */ | ||
1060 | void __weak crash_map_reserved_pages(void) | ||
1061 | {} | ||
1062 | |||
1063 | void __weak crash_unmap_reserved_pages(void) | ||
1064 | {} | ||
1065 | |||
1066 | #ifdef CONFIG_COMPAT | 218 | #ifdef CONFIG_COMPAT |
1067 | COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, | 219 | COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, |
1068 | compat_ulong_t, nr_segments, | 220 | compat_ulong_t, nr_segments, |
@@ -1101,646 +253,3 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, | |||
1101 | return sys_kexec_load(entry, nr_segments, ksegments, flags); | 253 | return sys_kexec_load(entry, nr_segments, ksegments, flags); |
1102 | } | 254 | } |
1103 | #endif | 255 | #endif |
1104 | |||
1105 | void crash_kexec(struct pt_regs *regs) | ||
1106 | { | ||
1107 | /* Take the kexec_mutex here to prevent sys_kexec_load | ||
1108 | * running on one cpu from replacing the crash kernel | ||
1109 | * we are using after a panic on a different cpu. | ||
1110 | * | ||
1111 | * If the crash kernel was not located in a fixed area | ||
1112 | * of memory the xchg(&kexec_crash_image) would be | ||
1113 | * sufficient. But since I reuse the memory... | ||
1114 | */ | ||
1115 | if (mutex_trylock(&kexec_mutex)) { | ||
1116 | if (kexec_crash_image) { | ||
1117 | struct pt_regs fixed_regs; | ||
1118 | |||
1119 | crash_setup_regs(&fixed_regs, regs); | ||
1120 | crash_save_vmcoreinfo(); | ||
1121 | machine_crash_shutdown(&fixed_regs); | ||
1122 | machine_kexec(kexec_crash_image); | ||
1123 | } | ||
1124 | mutex_unlock(&kexec_mutex); | ||
1125 | } | ||
1126 | } | ||
1127 | |||
1128 | size_t crash_get_memory_size(void) | ||
1129 | { | ||
1130 | size_t size = 0; | ||
1131 | mutex_lock(&kexec_mutex); | ||
1132 | if (crashk_res.end != crashk_res.start) | ||
1133 | size = resource_size(&crashk_res); | ||
1134 | mutex_unlock(&kexec_mutex); | ||
1135 | return size; | ||
1136 | } | ||
1137 | |||
1138 | void __weak crash_free_reserved_phys_range(unsigned long begin, | ||
1139 | unsigned long end) | ||
1140 | { | ||
1141 | unsigned long addr; | ||
1142 | |||
1143 | for (addr = begin; addr < end; addr += PAGE_SIZE) | ||
1144 | free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); | ||
1145 | } | ||
1146 | |||
1147 | int crash_shrink_memory(unsigned long new_size) | ||
1148 | { | ||
1149 | int ret = 0; | ||
1150 | unsigned long start, end; | ||
1151 | unsigned long old_size; | ||
1152 | struct resource *ram_res; | ||
1153 | |||
1154 | mutex_lock(&kexec_mutex); | ||
1155 | |||
1156 | if (kexec_crash_image) { | ||
1157 | ret = -ENOENT; | ||
1158 | goto unlock; | ||
1159 | } | ||
1160 | start = crashk_res.start; | ||
1161 | end = crashk_res.end; | ||
1162 | old_size = (end == 0) ? 0 : end - start + 1; | ||
1163 | if (new_size >= old_size) { | ||
1164 | ret = (new_size == old_size) ? 0 : -EINVAL; | ||
1165 | goto unlock; | ||
1166 | } | ||
1167 | |||
1168 | ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); | ||
1169 | if (!ram_res) { | ||
1170 | ret = -ENOMEM; | ||
1171 | goto unlock; | ||
1172 | } | ||
1173 | |||
1174 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); | ||
1175 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); | ||
1176 | |||
1177 | crash_map_reserved_pages(); | ||
1178 | crash_free_reserved_phys_range(end, crashk_res.end); | ||
1179 | |||
1180 | if ((start == end) && (crashk_res.parent != NULL)) | ||
1181 | release_resource(&crashk_res); | ||
1182 | |||
1183 | ram_res->start = end; | ||
1184 | ram_res->end = crashk_res.end; | ||
1185 | ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; | ||
1186 | ram_res->name = "System RAM"; | ||
1187 | |||
1188 | crashk_res.end = end - 1; | ||
1189 | |||
1190 | insert_resource(&iomem_resource, ram_res); | ||
1191 | crash_unmap_reserved_pages(); | ||
1192 | |||
1193 | unlock: | ||
1194 | mutex_unlock(&kexec_mutex); | ||
1195 | return ret; | ||
1196 | } | ||
1197 | |||
1198 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, | ||
1199 | size_t data_len) | ||
1200 | { | ||
1201 | struct elf_note note; | ||
1202 | |||
1203 | note.n_namesz = strlen(name) + 1; | ||
1204 | note.n_descsz = data_len; | ||
1205 | note.n_type = type; | ||
1206 | memcpy(buf, ¬e, sizeof(note)); | ||
1207 | buf += (sizeof(note) + 3)/4; | ||
1208 | memcpy(buf, name, note.n_namesz); | ||
1209 | buf += (note.n_namesz + 3)/4; | ||
1210 | memcpy(buf, data, note.n_descsz); | ||
1211 | buf += (note.n_descsz + 3)/4; | ||
1212 | |||
1213 | return buf; | ||
1214 | } | ||
1215 | |||
1216 | static void final_note(u32 *buf) | ||
1217 | { | ||
1218 | struct elf_note note; | ||
1219 | |||
1220 | note.n_namesz = 0; | ||
1221 | note.n_descsz = 0; | ||
1222 | note.n_type = 0; | ||
1223 | memcpy(buf, ¬e, sizeof(note)); | ||
1224 | } | ||
1225 | |||
1226 | void crash_save_cpu(struct pt_regs *regs, int cpu) | ||
1227 | { | ||
1228 | struct elf_prstatus prstatus; | ||
1229 | u32 *buf; | ||
1230 | |||
1231 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) | ||
1232 | return; | ||
1233 | |||
1234 | /* Using ELF notes here is opportunistic. | ||
1235 | * I need a well defined structure format | ||
1236 | * for the data I pass, and I need tags | ||
1237 | * on the data to indicate what information I have | ||
1238 | * squirrelled away. ELF notes happen to provide | ||
1239 | * all of that, so there is no need to invent something new. | ||
1240 | */ | ||
1241 | buf = (u32 *)per_cpu_ptr(crash_notes, cpu); | ||
1242 | if (!buf) | ||
1243 | return; | ||
1244 | memset(&prstatus, 0, sizeof(prstatus)); | ||
1245 | prstatus.pr_pid = current->pid; | ||
1246 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); | ||
1247 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | ||
1248 | &prstatus, sizeof(prstatus)); | ||
1249 | final_note(buf); | ||
1250 | } | ||
1251 | |||
1252 | static int __init crash_notes_memory_init(void) | ||
1253 | { | ||
1254 | /* Allocate memory for saving cpu registers. */ | ||
1255 | crash_notes = alloc_percpu(note_buf_t); | ||
1256 | if (!crash_notes) { | ||
1257 | pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); | ||
1258 | return -ENOMEM; | ||
1259 | } | ||
1260 | return 0; | ||
1261 | } | ||
1262 | subsys_initcall(crash_notes_memory_init); | ||
1263 | |||
1264 | |||
1265 | /* | ||
1266 | * parsing the "crashkernel" commandline | ||
1267 | * | ||
1268 | * this code is intended to be called from architecture specific code | ||
1269 | */ | ||
1270 | |||
1271 | |||
1272 | /* | ||
1273 | * This function parses command lines in the format | ||
1274 | * | ||
1275 | * crashkernel=ramsize-range:size[,...][@offset] | ||
1276 | * | ||
1277 | * The function returns 0 on success and -EINVAL on failure. | ||
1278 | */ | ||
1279 | static int __init parse_crashkernel_mem(char *cmdline, | ||
1280 | unsigned long long system_ram, | ||
1281 | unsigned long long *crash_size, | ||
1282 | unsigned long long *crash_base) | ||
1283 | { | ||
1284 | char *cur = cmdline, *tmp; | ||
1285 | |||
1286 | /* for each entry of the comma-separated list */ | ||
1287 | do { | ||
1288 | unsigned long long start, end = ULLONG_MAX, size; | ||
1289 | |||
1290 | /* get the start of the range */ | ||
1291 | start = memparse(cur, &tmp); | ||
1292 | if (cur == tmp) { | ||
1293 | pr_warn("crashkernel: Memory value expected\n"); | ||
1294 | return -EINVAL; | ||
1295 | } | ||
1296 | cur = tmp; | ||
1297 | if (*cur != '-') { | ||
1298 | pr_warn("crashkernel: '-' expected\n"); | ||
1299 | return -EINVAL; | ||
1300 | } | ||
1301 | cur++; | ||
1302 | |||
1303 | /* if no ':' is here, than we read the end */ | ||
1304 | if (*cur != ':') { | ||
1305 | end = memparse(cur, &tmp); | ||
1306 | if (cur == tmp) { | ||
1307 | pr_warn("crashkernel: Memory value expected\n"); | ||
1308 | return -EINVAL; | ||
1309 | } | ||
1310 | cur = tmp; | ||
1311 | if (end <= start) { | ||
1312 | pr_warn("crashkernel: end <= start\n"); | ||
1313 | return -EINVAL; | ||
1314 | } | ||
1315 | } | ||
1316 | |||
1317 | if (*cur != ':') { | ||
1318 | pr_warn("crashkernel: ':' expected\n"); | ||
1319 | return -EINVAL; | ||
1320 | } | ||
1321 | cur++; | ||
1322 | |||
1323 | size = memparse(cur, &tmp); | ||
1324 | if (cur == tmp) { | ||
1325 | pr_warn("Memory value expected\n"); | ||
1326 | return -EINVAL; | ||
1327 | } | ||
1328 | cur = tmp; | ||
1329 | if (size >= system_ram) { | ||
1330 | pr_warn("crashkernel: invalid size\n"); | ||
1331 | return -EINVAL; | ||
1332 | } | ||
1333 | |||
1334 | /* match ? */ | ||
1335 | if (system_ram >= start && system_ram < end) { | ||
1336 | *crash_size = size; | ||
1337 | break; | ||
1338 | } | ||
1339 | } while (*cur++ == ','); | ||
1340 | |||
1341 | if (*crash_size > 0) { | ||
1342 | while (*cur && *cur != ' ' && *cur != '@') | ||
1343 | cur++; | ||
1344 | if (*cur == '@') { | ||
1345 | cur++; | ||
1346 | *crash_base = memparse(cur, &tmp); | ||
1347 | if (cur == tmp) { | ||
1348 | pr_warn("Memory value expected after '@'\n"); | ||
1349 | return -EINVAL; | ||
1350 | } | ||
1351 | } | ||
1352 | } | ||
1353 | |||
1354 | return 0; | ||
1355 | } | ||
1356 | |||
1357 | /* | ||
1358 | * That function parses "simple" (old) crashkernel command lines like | ||
1359 | * | ||
1360 | * crashkernel=size[@offset] | ||
1361 | * | ||
1362 | * It returns 0 on success and -EINVAL on failure. | ||
1363 | */ | ||
1364 | static int __init parse_crashkernel_simple(char *cmdline, | ||
1365 | unsigned long long *crash_size, | ||
1366 | unsigned long long *crash_base) | ||
1367 | { | ||
1368 | char *cur = cmdline; | ||
1369 | |||
1370 | *crash_size = memparse(cmdline, &cur); | ||
1371 | if (cmdline == cur) { | ||
1372 | pr_warn("crashkernel: memory value expected\n"); | ||
1373 | return -EINVAL; | ||
1374 | } | ||
1375 | |||
1376 | if (*cur == '@') | ||
1377 | *crash_base = memparse(cur+1, &cur); | ||
1378 | else if (*cur != ' ' && *cur != '\0') { | ||
1379 | pr_warn("crashkernel: unrecognized char\n"); | ||
1380 | return -EINVAL; | ||
1381 | } | ||
1382 | |||
1383 | return 0; | ||
1384 | } | ||
1385 | |||
1386 | #define SUFFIX_HIGH 0 | ||
1387 | #define SUFFIX_LOW 1 | ||
1388 | #define SUFFIX_NULL 2 | ||
1389 | static __initdata char *suffix_tbl[] = { | ||
1390 | [SUFFIX_HIGH] = ",high", | ||
1391 | [SUFFIX_LOW] = ",low", | ||
1392 | [SUFFIX_NULL] = NULL, | ||
1393 | }; | ||
1394 | |||
1395 | /* | ||
1396 | * That function parses "suffix" crashkernel command lines like | ||
1397 | * | ||
1398 | * crashkernel=size,[high|low] | ||
1399 | * | ||
1400 | * It returns 0 on success and -EINVAL on failure. | ||
1401 | */ | ||
1402 | static int __init parse_crashkernel_suffix(char *cmdline, | ||
1403 | unsigned long long *crash_size, | ||
1404 | const char *suffix) | ||
1405 | { | ||
1406 | char *cur = cmdline; | ||
1407 | |||
1408 | *crash_size = memparse(cmdline, &cur); | ||
1409 | if (cmdline == cur) { | ||
1410 | pr_warn("crashkernel: memory value expected\n"); | ||
1411 | return -EINVAL; | ||
1412 | } | ||
1413 | |||
1414 | /* check with suffix */ | ||
1415 | if (strncmp(cur, suffix, strlen(suffix))) { | ||
1416 | pr_warn("crashkernel: unrecognized char\n"); | ||
1417 | return -EINVAL; | ||
1418 | } | ||
1419 | cur += strlen(suffix); | ||
1420 | if (*cur != ' ' && *cur != '\0') { | ||
1421 | pr_warn("crashkernel: unrecognized char\n"); | ||
1422 | return -EINVAL; | ||
1423 | } | ||
1424 | |||
1425 | return 0; | ||
1426 | } | ||
1427 | |||
1428 | static __init char *get_last_crashkernel(char *cmdline, | ||
1429 | const char *name, | ||
1430 | const char *suffix) | ||
1431 | { | ||
1432 | char *p = cmdline, *ck_cmdline = NULL; | ||
1433 | |||
1434 | /* find crashkernel and use the last one if there are more */ | ||
1435 | p = strstr(p, name); | ||
1436 | while (p) { | ||
1437 | char *end_p = strchr(p, ' '); | ||
1438 | char *q; | ||
1439 | |||
1440 | if (!end_p) | ||
1441 | end_p = p + strlen(p); | ||
1442 | |||
1443 | if (!suffix) { | ||
1444 | int i; | ||
1445 | |||
1446 | /* skip the one with any known suffix */ | ||
1447 | for (i = 0; suffix_tbl[i]; i++) { | ||
1448 | q = end_p - strlen(suffix_tbl[i]); | ||
1449 | if (!strncmp(q, suffix_tbl[i], | ||
1450 | strlen(suffix_tbl[i]))) | ||
1451 | goto next; | ||
1452 | } | ||
1453 | ck_cmdline = p; | ||
1454 | } else { | ||
1455 | q = end_p - strlen(suffix); | ||
1456 | if (!strncmp(q, suffix, strlen(suffix))) | ||
1457 | ck_cmdline = p; | ||
1458 | } | ||
1459 | next: | ||
1460 | p = strstr(p+1, name); | ||
1461 | } | ||
1462 | |||
1463 | if (!ck_cmdline) | ||
1464 | return NULL; | ||
1465 | |||
1466 | return ck_cmdline; | ||
1467 | } | ||
1468 | |||
1469 | static int __init __parse_crashkernel(char *cmdline, | ||
1470 | unsigned long long system_ram, | ||
1471 | unsigned long long *crash_size, | ||
1472 | unsigned long long *crash_base, | ||
1473 | const char *name, | ||
1474 | const char *suffix) | ||
1475 | { | ||
1476 | char *first_colon, *first_space; | ||
1477 | char *ck_cmdline; | ||
1478 | |||
1479 | BUG_ON(!crash_size || !crash_base); | ||
1480 | *crash_size = 0; | ||
1481 | *crash_base = 0; | ||
1482 | |||
1483 | ck_cmdline = get_last_crashkernel(cmdline, name, suffix); | ||
1484 | |||
1485 | if (!ck_cmdline) | ||
1486 | return -EINVAL; | ||
1487 | |||
1488 | ck_cmdline += strlen(name); | ||
1489 | |||
1490 | if (suffix) | ||
1491 | return parse_crashkernel_suffix(ck_cmdline, crash_size, | ||
1492 | suffix); | ||
1493 | /* | ||
1494 | * if the commandline contains a ':', then that's the extended | ||
1495 | * syntax -- if not, it must be the classic syntax | ||
1496 | */ | ||
1497 | first_colon = strchr(ck_cmdline, ':'); | ||
1498 | first_space = strchr(ck_cmdline, ' '); | ||
1499 | if (first_colon && (!first_space || first_colon < first_space)) | ||
1500 | return parse_crashkernel_mem(ck_cmdline, system_ram, | ||
1501 | crash_size, crash_base); | ||
1502 | |||
1503 | return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); | ||
1504 | } | ||
1505 | |||
1506 | /* | ||
1507 | * That function is the entry point for command line parsing and should be | ||
1508 | * called from the arch-specific code. | ||
1509 | */ | ||
1510 | int __init parse_crashkernel(char *cmdline, | ||
1511 | unsigned long long system_ram, | ||
1512 | unsigned long long *crash_size, | ||
1513 | unsigned long long *crash_base) | ||
1514 | { | ||
1515 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1516 | "crashkernel=", NULL); | ||
1517 | } | ||
1518 | |||
1519 | int __init parse_crashkernel_high(char *cmdline, | ||
1520 | unsigned long long system_ram, | ||
1521 | unsigned long long *crash_size, | ||
1522 | unsigned long long *crash_base) | ||
1523 | { | ||
1524 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1525 | "crashkernel=", suffix_tbl[SUFFIX_HIGH]); | ||
1526 | } | ||
1527 | |||
1528 | int __init parse_crashkernel_low(char *cmdline, | ||
1529 | unsigned long long system_ram, | ||
1530 | unsigned long long *crash_size, | ||
1531 | unsigned long long *crash_base) | ||
1532 | { | ||
1533 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1534 | "crashkernel=", suffix_tbl[SUFFIX_LOW]); | ||
1535 | } | ||
1536 | |||
1537 | static void update_vmcoreinfo_note(void) | ||
1538 | { | ||
1539 | u32 *buf = vmcoreinfo_note; | ||
1540 | |||
1541 | if (!vmcoreinfo_size) | ||
1542 | return; | ||
1543 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | ||
1544 | vmcoreinfo_size); | ||
1545 | final_note(buf); | ||
1546 | } | ||
1547 | |||
1548 | void crash_save_vmcoreinfo(void) | ||
1549 | { | ||
1550 | vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); | ||
1551 | update_vmcoreinfo_note(); | ||
1552 | } | ||
1553 | |||
1554 | void vmcoreinfo_append_str(const char *fmt, ...) | ||
1555 | { | ||
1556 | va_list args; | ||
1557 | char buf[0x50]; | ||
1558 | size_t r; | ||
1559 | |||
1560 | va_start(args, fmt); | ||
1561 | r = vscnprintf(buf, sizeof(buf), fmt, args); | ||
1562 | va_end(args); | ||
1563 | |||
1564 | r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); | ||
1565 | |||
1566 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); | ||
1567 | |||
1568 | vmcoreinfo_size += r; | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * provide an empty default implementation here -- architecture | ||
1573 | * code may override this | ||
1574 | */ | ||
1575 | void __weak arch_crash_save_vmcoreinfo(void) | ||
1576 | {} | ||
1577 | |||
1578 | unsigned long __weak paddr_vmcoreinfo_note(void) | ||
1579 | { | ||
1580 | return __pa((unsigned long)(char *)&vmcoreinfo_note); | ||
1581 | } | ||
1582 | |||
1583 | static int __init crash_save_vmcoreinfo_init(void) | ||
1584 | { | ||
1585 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); | ||
1586 | VMCOREINFO_PAGESIZE(PAGE_SIZE); | ||
1587 | |||
1588 | VMCOREINFO_SYMBOL(init_uts_ns); | ||
1589 | VMCOREINFO_SYMBOL(node_online_map); | ||
1590 | #ifdef CONFIG_MMU | ||
1591 | VMCOREINFO_SYMBOL(swapper_pg_dir); | ||
1592 | #endif | ||
1593 | VMCOREINFO_SYMBOL(_stext); | ||
1594 | VMCOREINFO_SYMBOL(vmap_area_list); | ||
1595 | |||
1596 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
1597 | VMCOREINFO_SYMBOL(mem_map); | ||
1598 | VMCOREINFO_SYMBOL(contig_page_data); | ||
1599 | #endif | ||
1600 | #ifdef CONFIG_SPARSEMEM | ||
1601 | VMCOREINFO_SYMBOL(mem_section); | ||
1602 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); | ||
1603 | VMCOREINFO_STRUCT_SIZE(mem_section); | ||
1604 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | ||
1605 | #endif | ||
1606 | VMCOREINFO_STRUCT_SIZE(page); | ||
1607 | VMCOREINFO_STRUCT_SIZE(pglist_data); | ||
1608 | VMCOREINFO_STRUCT_SIZE(zone); | ||
1609 | VMCOREINFO_STRUCT_SIZE(free_area); | ||
1610 | VMCOREINFO_STRUCT_SIZE(list_head); | ||
1611 | VMCOREINFO_SIZE(nodemask_t); | ||
1612 | VMCOREINFO_OFFSET(page, flags); | ||
1613 | VMCOREINFO_OFFSET(page, _count); | ||
1614 | VMCOREINFO_OFFSET(page, mapping); | ||
1615 | VMCOREINFO_OFFSET(page, lru); | ||
1616 | VMCOREINFO_OFFSET(page, _mapcount); | ||
1617 | VMCOREINFO_OFFSET(page, private); | ||
1618 | VMCOREINFO_OFFSET(pglist_data, node_zones); | ||
1619 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | ||
1620 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
1621 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); | ||
1622 | #endif | ||
1623 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); | ||
1624 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); | ||
1625 | VMCOREINFO_OFFSET(pglist_data, node_id); | ||
1626 | VMCOREINFO_OFFSET(zone, free_area); | ||
1627 | VMCOREINFO_OFFSET(zone, vm_stat); | ||
1628 | VMCOREINFO_OFFSET(zone, spanned_pages); | ||
1629 | VMCOREINFO_OFFSET(free_area, free_list); | ||
1630 | VMCOREINFO_OFFSET(list_head, next); | ||
1631 | VMCOREINFO_OFFSET(list_head, prev); | ||
1632 | VMCOREINFO_OFFSET(vmap_area, va_start); | ||
1633 | VMCOREINFO_OFFSET(vmap_area, list); | ||
1634 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | ||
1635 | log_buf_kexec_setup(); | ||
1636 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | ||
1637 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | ||
1638 | VMCOREINFO_NUMBER(PG_lru); | ||
1639 | VMCOREINFO_NUMBER(PG_private); | ||
1640 | VMCOREINFO_NUMBER(PG_swapcache); | ||
1641 | VMCOREINFO_NUMBER(PG_slab); | ||
1642 | #ifdef CONFIG_MEMORY_FAILURE | ||
1643 | VMCOREINFO_NUMBER(PG_hwpoison); | ||
1644 | #endif | ||
1645 | VMCOREINFO_NUMBER(PG_head_mask); | ||
1646 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | ||
1647 | #ifdef CONFIG_HUGETLBFS | ||
1648 | VMCOREINFO_SYMBOL(free_huge_page); | ||
1649 | #endif | ||
1650 | |||
1651 | arch_crash_save_vmcoreinfo(); | ||
1652 | update_vmcoreinfo_note(); | ||
1653 | |||
1654 | return 0; | ||
1655 | } | ||
1656 | |||
1657 | subsys_initcall(crash_save_vmcoreinfo_init); | ||
1658 | |||
1659 | /* | ||
1660 | * Move into place and start executing a preloaded standalone | ||
1661 | * executable. If nothing was preloaded return an error. | ||
1662 | */ | ||
1663 | int kernel_kexec(void) | ||
1664 | { | ||
1665 | int error = 0; | ||
1666 | |||
1667 | if (!mutex_trylock(&kexec_mutex)) | ||
1668 | return -EBUSY; | ||
1669 | if (!kexec_image) { | ||
1670 | error = -EINVAL; | ||
1671 | goto Unlock; | ||
1672 | } | ||
1673 | |||
1674 | #ifdef CONFIG_KEXEC_JUMP | ||
1675 | if (kexec_image->preserve_context) { | ||
1676 | lock_system_sleep(); | ||
1677 | pm_prepare_console(); | ||
1678 | error = freeze_processes(); | ||
1679 | if (error) { | ||
1680 | error = -EBUSY; | ||
1681 | goto Restore_console; | ||
1682 | } | ||
1683 | suspend_console(); | ||
1684 | error = dpm_suspend_start(PMSG_FREEZE); | ||
1685 | if (error) | ||
1686 | goto Resume_console; | ||
1687 | /* At this point, dpm_suspend_start() has been called, | ||
1688 | * but *not* dpm_suspend_end(). We *must* call | ||
1689 | * dpm_suspend_end() now. Otherwise, drivers for | ||
1690 | * some devices (e.g. interrupt controllers) become | ||
1691 | * desynchronized with the actual state of the | ||
1692 | * hardware at resume time, and evil weirdness ensues. | ||
1693 | */ | ||
1694 | error = dpm_suspend_end(PMSG_FREEZE); | ||
1695 | if (error) | ||
1696 | goto Resume_devices; | ||
1697 | error = disable_nonboot_cpus(); | ||
1698 | if (error) | ||
1699 | goto Enable_cpus; | ||
1700 | local_irq_disable(); | ||
1701 | error = syscore_suspend(); | ||
1702 | if (error) | ||
1703 | goto Enable_irqs; | ||
1704 | } else | ||
1705 | #endif | ||
1706 | { | ||
1707 | kexec_in_progress = true; | ||
1708 | kernel_restart_prepare(NULL); | ||
1709 | migrate_to_reboot_cpu(); | ||
1710 | |||
1711 | /* | ||
1712 | * migrate_to_reboot_cpu() disables CPU hotplug assuming that | ||
1713 | * no further code needs to use CPU hotplug (which is true in | ||
1714 | * the reboot case). However, the kexec path depends on using | ||
1715 | * CPU hotplug again; so re-enable it here. | ||
1716 | */ | ||
1717 | cpu_hotplug_enable(); | ||
1718 | pr_emerg("Starting new kernel\n"); | ||
1719 | machine_shutdown(); | ||
1720 | } | ||
1721 | |||
1722 | machine_kexec(kexec_image); | ||
1723 | |||
1724 | #ifdef CONFIG_KEXEC_JUMP | ||
1725 | if (kexec_image->preserve_context) { | ||
1726 | syscore_resume(); | ||
1727 | Enable_irqs: | ||
1728 | local_irq_enable(); | ||
1729 | Enable_cpus: | ||
1730 | enable_nonboot_cpus(); | ||
1731 | dpm_resume_start(PMSG_RESTORE); | ||
1732 | Resume_devices: | ||
1733 | dpm_resume_end(PMSG_RESTORE); | ||
1734 | Resume_console: | ||
1735 | resume_console(); | ||
1736 | thaw_processes(); | ||
1737 | Restore_console: | ||
1738 | pm_restore_console(); | ||
1739 | unlock_system_sleep(); | ||
1740 | } | ||
1741 | #endif | ||
1742 | |||
1743 | Unlock: | ||
1744 | mutex_unlock(&kexec_mutex); | ||
1745 | return error; | ||
1746 | } | ||
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c new file mode 100644 index 000000000000..9aa25c034b2e --- /dev/null +++ b/kernel/kexec_core.c | |||
@@ -0,0 +1,1511 @@ | |||
1 | /* | ||
2 | * kexec.c - kexec system call core code. | ||
3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> | ||
4 | * | ||
5 | * This source code is licensed under the GNU General Public License, | ||
6 | * Version 2. See the file COPYING for more details. | ||
7 | */ | ||
8 | |||
9 | #define pr_fmt(fmt) "kexec: " fmt | ||
10 | |||
11 | #include <linux/capability.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/kexec.h> | ||
17 | #include <linux/mutex.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/highmem.h> | ||
20 | #include <linux/syscalls.h> | ||
21 | #include <linux/reboot.h> | ||
22 | #include <linux/ioport.h> | ||
23 | #include <linux/hardirq.h> | ||
24 | #include <linux/elf.h> | ||
25 | #include <linux/elfcore.h> | ||
26 | #include <linux/utsname.h> | ||
27 | #include <linux/numa.h> | ||
28 | #include <linux/suspend.h> | ||
29 | #include <linux/device.h> | ||
30 | #include <linux/freezer.h> | ||
31 | #include <linux/pm.h> | ||
32 | #include <linux/cpu.h> | ||
33 | #include <linux/uaccess.h> | ||
34 | #include <linux/io.h> | ||
35 | #include <linux/console.h> | ||
36 | #include <linux/vmalloc.h> | ||
37 | #include <linux/swap.h> | ||
38 | #include <linux/syscore_ops.h> | ||
39 | #include <linux/compiler.h> | ||
40 | #include <linux/hugetlb.h> | ||
41 | |||
42 | #include <asm/page.h> | ||
43 | #include <asm/sections.h> | ||
44 | |||
45 | #include <crypto/hash.h> | ||
46 | #include <crypto/sha.h> | ||
47 | #include "kexec_internal.h" | ||
48 | |||
49 | DEFINE_MUTEX(kexec_mutex); | ||
50 | |||
51 | /* Per cpu memory for storing cpu states in case of system crash. */ | ||
52 | note_buf_t __percpu *crash_notes; | ||
53 | |||
54 | /* vmcoreinfo stuff */ | ||
55 | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; | ||
56 | u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | ||
57 | size_t vmcoreinfo_size; | ||
58 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | ||
59 | |||
60 | /* Flag to indicate we are going to kexec a new kernel */ | ||
61 | bool kexec_in_progress = false; | ||
62 | |||
63 | |||
64 | /* Location of the reserved area for the crash kernel */ | ||
65 | struct resource crashk_res = { | ||
66 | .name = "Crash kernel", | ||
67 | .start = 0, | ||
68 | .end = 0, | ||
69 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
70 | }; | ||
71 | struct resource crashk_low_res = { | ||
72 | .name = "Crash kernel", | ||
73 | .start = 0, | ||
74 | .end = 0, | ||
75 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
76 | }; | ||
77 | |||
78 | int kexec_should_crash(struct task_struct *p) | ||
79 | { | ||
80 | /* | ||
81 | * If crash_kexec_post_notifiers is enabled, don't run | ||
82 | * crash_kexec() here yet, which must be run after panic | ||
83 | * notifiers in panic(). | ||
84 | */ | ||
85 | if (crash_kexec_post_notifiers) | ||
86 | return 0; | ||
87 | /* | ||
88 | * There are 4 panic() calls in do_exit() path, each of which | ||
89 | * corresponds to each of these 4 conditions. | ||
90 | */ | ||
91 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) | ||
92 | return 1; | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * When kexec transitions to the new kernel there is a one-to-one | ||
98 | * mapping between physical and virtual addresses. On processors | ||
99 | * where you can disable the MMU this is trivial, and easy. For | ||
100 | * others it is still a simple predictable page table to setup. | ||
101 | * | ||
102 | * In that environment kexec copies the new kernel to its final | ||
103 | * resting place. This means I can only support memory whose | ||
104 | * physical address can fit in an unsigned long. In particular | ||
105 | * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. | ||
106 | * If the assembly stub has more restrictive requirements | ||
107 | * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be | ||
108 | * defined more restrictively in <asm/kexec.h>. | ||
109 | * | ||
110 | * The code for the transition from the current kernel to the | ||
111 | * the new kernel is placed in the control_code_buffer, whose size | ||
112 | * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single | ||
113 | * page of memory is necessary, but some architectures require more. | ||
114 | * Because this memory must be identity mapped in the transition from | ||
115 | * virtual to physical addresses it must live in the range | ||
116 | * 0 - TASK_SIZE, as only the user space mappings are arbitrarily | ||
117 | * modifiable. | ||
118 | * | ||
119 | * The assembly stub in the control code buffer is passed a linked list | ||
120 | * of descriptor pages detailing the source pages of the new kernel, | ||
121 | * and the destination addresses of those source pages. As this data | ||
122 | * structure is not used in the context of the current OS, it must | ||
123 | * be self-contained. | ||
124 | * | ||
125 | * The code has been made to work with highmem pages and will use a | ||
126 | * destination page in its final resting place (if it happens | ||
127 | * to allocate it). The end product of this is that most of the | ||
128 | * physical address space, and most of RAM can be used. | ||
129 | * | ||
130 | * Future directions include: | ||
131 | * - allocating a page table with the control code buffer identity | ||
132 | * mapped, to simplify machine_kexec and make kexec_on_panic more | ||
133 | * reliable. | ||
134 | */ | ||
135 | |||
136 | /* | ||
137 | * KIMAGE_NO_DEST is an impossible destination address..., for | ||
138 | * allocating pages whose destination address we do not care about. | ||
139 | */ | ||
140 | #define KIMAGE_NO_DEST (-1UL) | ||
141 | |||
142 | static struct page *kimage_alloc_page(struct kimage *image, | ||
143 | gfp_t gfp_mask, | ||
144 | unsigned long dest); | ||
145 | |||
146 | int sanity_check_segment_list(struct kimage *image) | ||
147 | { | ||
148 | int result, i; | ||
149 | unsigned long nr_segments = image->nr_segments; | ||
150 | |||
151 | /* | ||
152 | * Verify we have good destination addresses. The caller is | ||
153 | * responsible for making certain we don't attempt to load | ||
154 | * the new image into invalid or reserved areas of RAM. This | ||
155 | * just verifies it is an address we can use. | ||
156 | * | ||
157 | * Since the kernel does everything in page size chunks ensure | ||
158 | * the destination addresses are page aligned. Too many | ||
159 | * special cases crop of when we don't do this. The most | ||
160 | * insidious is getting overlapping destination addresses | ||
161 | * simply because addresses are changed to page size | ||
162 | * granularity. | ||
163 | */ | ||
164 | result = -EADDRNOTAVAIL; | ||
165 | for (i = 0; i < nr_segments; i++) { | ||
166 | unsigned long mstart, mend; | ||
167 | |||
168 | mstart = image->segment[i].mem; | ||
169 | mend = mstart + image->segment[i].memsz; | ||
170 | if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) | ||
171 | return result; | ||
172 | if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) | ||
173 | return result; | ||
174 | } | ||
175 | |||
176 | /* Verify our destination addresses do not overlap. | ||
177 | * If we alloed overlapping destination addresses | ||
178 | * through very weird things can happen with no | ||
179 | * easy explanation as one segment stops on another. | ||
180 | */ | ||
181 | result = -EINVAL; | ||
182 | for (i = 0; i < nr_segments; i++) { | ||
183 | unsigned long mstart, mend; | ||
184 | unsigned long j; | ||
185 | |||
186 | mstart = image->segment[i].mem; | ||
187 | mend = mstart + image->segment[i].memsz; | ||
188 | for (j = 0; j < i; j++) { | ||
189 | unsigned long pstart, pend; | ||
190 | |||
191 | pstart = image->segment[j].mem; | ||
192 | pend = pstart + image->segment[j].memsz; | ||
193 | /* Do the segments overlap ? */ | ||
194 | if ((mend > pstart) && (mstart < pend)) | ||
195 | return result; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | /* Ensure our buffer sizes are strictly less than | ||
200 | * our memory sizes. This should always be the case, | ||
201 | * and it is easier to check up front than to be surprised | ||
202 | * later on. | ||
203 | */ | ||
204 | result = -EINVAL; | ||
205 | for (i = 0; i < nr_segments; i++) { | ||
206 | if (image->segment[i].bufsz > image->segment[i].memsz) | ||
207 | return result; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Verify we have good destination addresses. Normally | ||
212 | * the caller is responsible for making certain we don't | ||
213 | * attempt to load the new image into invalid or reserved | ||
214 | * areas of RAM. But crash kernels are preloaded into a | ||
215 | * reserved area of ram. We must ensure the addresses | ||
216 | * are in the reserved area otherwise preloading the | ||
217 | * kernel could corrupt things. | ||
218 | */ | ||
219 | |||
220 | if (image->type == KEXEC_TYPE_CRASH) { | ||
221 | result = -EADDRNOTAVAIL; | ||
222 | for (i = 0; i < nr_segments; i++) { | ||
223 | unsigned long mstart, mend; | ||
224 | |||
225 | mstart = image->segment[i].mem; | ||
226 | mend = mstart + image->segment[i].memsz - 1; | ||
227 | /* Ensure we are within the crash kernel limits */ | ||
228 | if ((mstart < crashk_res.start) || | ||
229 | (mend > crashk_res.end)) | ||
230 | return result; | ||
231 | } | ||
232 | } | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | struct kimage *do_kimage_alloc_init(void) | ||
238 | { | ||
239 | struct kimage *image; | ||
240 | |||
241 | /* Allocate a controlling structure */ | ||
242 | image = kzalloc(sizeof(*image), GFP_KERNEL); | ||
243 | if (!image) | ||
244 | return NULL; | ||
245 | |||
246 | image->head = 0; | ||
247 | image->entry = &image->head; | ||
248 | image->last_entry = &image->head; | ||
249 | image->control_page = ~0; /* By default this does not apply */ | ||
250 | image->type = KEXEC_TYPE_DEFAULT; | ||
251 | |||
252 | /* Initialize the list of control pages */ | ||
253 | INIT_LIST_HEAD(&image->control_pages); | ||
254 | |||
255 | /* Initialize the list of destination pages */ | ||
256 | INIT_LIST_HEAD(&image->dest_pages); | ||
257 | |||
258 | /* Initialize the list of unusable pages */ | ||
259 | INIT_LIST_HEAD(&image->unusable_pages); | ||
260 | |||
261 | return image; | ||
262 | } | ||
263 | |||
264 | int kimage_is_destination_range(struct kimage *image, | ||
265 | unsigned long start, | ||
266 | unsigned long end) | ||
267 | { | ||
268 | unsigned long i; | ||
269 | |||
270 | for (i = 0; i < image->nr_segments; i++) { | ||
271 | unsigned long mstart, mend; | ||
272 | |||
273 | mstart = image->segment[i].mem; | ||
274 | mend = mstart + image->segment[i].memsz; | ||
275 | if ((end > mstart) && (start < mend)) | ||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | ||
283 | { | ||
284 | struct page *pages; | ||
285 | |||
286 | pages = alloc_pages(gfp_mask, order); | ||
287 | if (pages) { | ||
288 | unsigned int count, i; | ||
289 | |||
290 | pages->mapping = NULL; | ||
291 | set_page_private(pages, order); | ||
292 | count = 1 << order; | ||
293 | for (i = 0; i < count; i++) | ||
294 | SetPageReserved(pages + i); | ||
295 | } | ||
296 | |||
297 | return pages; | ||
298 | } | ||
299 | |||
300 | static void kimage_free_pages(struct page *page) | ||
301 | { | ||
302 | unsigned int order, count, i; | ||
303 | |||
304 | order = page_private(page); | ||
305 | count = 1 << order; | ||
306 | for (i = 0; i < count; i++) | ||
307 | ClearPageReserved(page + i); | ||
308 | __free_pages(page, order); | ||
309 | } | ||
310 | |||
311 | void kimage_free_page_list(struct list_head *list) | ||
312 | { | ||
313 | struct list_head *pos, *next; | ||
314 | |||
315 | list_for_each_safe(pos, next, list) { | ||
316 | struct page *page; | ||
317 | |||
318 | page = list_entry(pos, struct page, lru); | ||
319 | list_del(&page->lru); | ||
320 | kimage_free_pages(page); | ||
321 | } | ||
322 | } | ||
323 | |||
324 | static struct page *kimage_alloc_normal_control_pages(struct kimage *image, | ||
325 | unsigned int order) | ||
326 | { | ||
327 | /* Control pages are special, they are the intermediaries | ||
328 | * that are needed while we copy the rest of the pages | ||
329 | * to their final resting place. As such they must | ||
330 | * not conflict with either the destination addresses | ||
331 | * or memory the kernel is already using. | ||
332 | * | ||
333 | * The only case where we really need more than one of | ||
334 | * these are for architectures where we cannot disable | ||
335 | * the MMU and must instead generate an identity mapped | ||
336 | * page table for all of the memory. | ||
337 | * | ||
338 | * At worst this runs in O(N) of the image size. | ||
339 | */ | ||
340 | struct list_head extra_pages; | ||
341 | struct page *pages; | ||
342 | unsigned int count; | ||
343 | |||
344 | count = 1 << order; | ||
345 | INIT_LIST_HEAD(&extra_pages); | ||
346 | |||
347 | /* Loop while I can allocate a page and the page allocated | ||
348 | * is a destination page. | ||
349 | */ | ||
350 | do { | ||
351 | unsigned long pfn, epfn, addr, eaddr; | ||
352 | |||
353 | pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); | ||
354 | if (!pages) | ||
355 | break; | ||
356 | pfn = page_to_pfn(pages); | ||
357 | epfn = pfn + count; | ||
358 | addr = pfn << PAGE_SHIFT; | ||
359 | eaddr = epfn << PAGE_SHIFT; | ||
360 | if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || | ||
361 | kimage_is_destination_range(image, addr, eaddr)) { | ||
362 | list_add(&pages->lru, &extra_pages); | ||
363 | pages = NULL; | ||
364 | } | ||
365 | } while (!pages); | ||
366 | |||
367 | if (pages) { | ||
368 | /* Remember the allocated page... */ | ||
369 | list_add(&pages->lru, &image->control_pages); | ||
370 | |||
371 | /* Because the page is already in it's destination | ||
372 | * location we will never allocate another page at | ||
373 | * that address. Therefore kimage_alloc_pages | ||
374 | * will not return it (again) and we don't need | ||
375 | * to give it an entry in image->segment[]. | ||
376 | */ | ||
377 | } | ||
378 | /* Deal with the destination pages I have inadvertently allocated. | ||
379 | * | ||
380 | * Ideally I would convert multi-page allocations into single | ||
381 | * page allocations, and add everything to image->dest_pages. | ||
382 | * | ||
383 | * For now it is simpler to just free the pages. | ||
384 | */ | ||
385 | kimage_free_page_list(&extra_pages); | ||
386 | |||
387 | return pages; | ||
388 | } | ||
389 | |||
390 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | ||
391 | unsigned int order) | ||
392 | { | ||
393 | /* Control pages are special, they are the intermediaries | ||
394 | * that are needed while we copy the rest of the pages | ||
395 | * to their final resting place. As such they must | ||
396 | * not conflict with either the destination addresses | ||
397 | * or memory the kernel is already using. | ||
398 | * | ||
399 | * Control pages are also the only pags we must allocate | ||
400 | * when loading a crash kernel. All of the other pages | ||
401 | * are specified by the segments and we just memcpy | ||
402 | * into them directly. | ||
403 | * | ||
404 | * The only case where we really need more than one of | ||
405 | * these are for architectures where we cannot disable | ||
406 | * the MMU and must instead generate an identity mapped | ||
407 | * page table for all of the memory. | ||
408 | * | ||
409 | * Given the low demand this implements a very simple | ||
410 | * allocator that finds the first hole of the appropriate | ||
411 | * size in the reserved memory region, and allocates all | ||
412 | * of the memory up to and including the hole. | ||
413 | */ | ||
414 | unsigned long hole_start, hole_end, size; | ||
415 | struct page *pages; | ||
416 | |||
417 | pages = NULL; | ||
418 | size = (1 << order) << PAGE_SHIFT; | ||
419 | hole_start = (image->control_page + (size - 1)) & ~(size - 1); | ||
420 | hole_end = hole_start + size - 1; | ||
421 | while (hole_end <= crashk_res.end) { | ||
422 | unsigned long i; | ||
423 | |||
424 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) | ||
425 | break; | ||
426 | /* See if I overlap any of the segments */ | ||
427 | for (i = 0; i < image->nr_segments; i++) { | ||
428 | unsigned long mstart, mend; | ||
429 | |||
430 | mstart = image->segment[i].mem; | ||
431 | mend = mstart + image->segment[i].memsz - 1; | ||
432 | if ((hole_end >= mstart) && (hole_start <= mend)) { | ||
433 | /* Advance the hole to the end of the segment */ | ||
434 | hole_start = (mend + (size - 1)) & ~(size - 1); | ||
435 | hole_end = hole_start + size - 1; | ||
436 | break; | ||
437 | } | ||
438 | } | ||
439 | /* If I don't overlap any segments I have found my hole! */ | ||
440 | if (i == image->nr_segments) { | ||
441 | pages = pfn_to_page(hole_start >> PAGE_SHIFT); | ||
442 | break; | ||
443 | } | ||
444 | } | ||
445 | if (pages) | ||
446 | image->control_page = hole_end; | ||
447 | |||
448 | return pages; | ||
449 | } | ||
450 | |||
451 | |||
452 | struct page *kimage_alloc_control_pages(struct kimage *image, | ||
453 | unsigned int order) | ||
454 | { | ||
455 | struct page *pages = NULL; | ||
456 | |||
457 | switch (image->type) { | ||
458 | case KEXEC_TYPE_DEFAULT: | ||
459 | pages = kimage_alloc_normal_control_pages(image, order); | ||
460 | break; | ||
461 | case KEXEC_TYPE_CRASH: | ||
462 | pages = kimage_alloc_crash_control_pages(image, order); | ||
463 | break; | ||
464 | } | ||
465 | |||
466 | return pages; | ||
467 | } | ||
468 | |||
469 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) | ||
470 | { | ||
471 | if (*image->entry != 0) | ||
472 | image->entry++; | ||
473 | |||
474 | if (image->entry == image->last_entry) { | ||
475 | kimage_entry_t *ind_page; | ||
476 | struct page *page; | ||
477 | |||
478 | page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); | ||
479 | if (!page) | ||
480 | return -ENOMEM; | ||
481 | |||
482 | ind_page = page_address(page); | ||
483 | *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; | ||
484 | image->entry = ind_page; | ||
485 | image->last_entry = ind_page + | ||
486 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); | ||
487 | } | ||
488 | *image->entry = entry; | ||
489 | image->entry++; | ||
490 | *image->entry = 0; | ||
491 | |||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static int kimage_set_destination(struct kimage *image, | ||
496 | unsigned long destination) | ||
497 | { | ||
498 | int result; | ||
499 | |||
500 | destination &= PAGE_MASK; | ||
501 | result = kimage_add_entry(image, destination | IND_DESTINATION); | ||
502 | |||
503 | return result; | ||
504 | } | ||
505 | |||
506 | |||
507 | static int kimage_add_page(struct kimage *image, unsigned long page) | ||
508 | { | ||
509 | int result; | ||
510 | |||
511 | page &= PAGE_MASK; | ||
512 | result = kimage_add_entry(image, page | IND_SOURCE); | ||
513 | |||
514 | return result; | ||
515 | } | ||
516 | |||
517 | |||
518 | static void kimage_free_extra_pages(struct kimage *image) | ||
519 | { | ||
520 | /* Walk through and free any extra destination pages I may have */ | ||
521 | kimage_free_page_list(&image->dest_pages); | ||
522 | |||
523 | /* Walk through and free any unusable pages I have cached */ | ||
524 | kimage_free_page_list(&image->unusable_pages); | ||
525 | |||
526 | } | ||
527 | void kimage_terminate(struct kimage *image) | ||
528 | { | ||
529 | if (*image->entry != 0) | ||
530 | image->entry++; | ||
531 | |||
532 | *image->entry = IND_DONE; | ||
533 | } | ||
534 | |||
535 | #define for_each_kimage_entry(image, ptr, entry) \ | ||
536 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ | ||
537 | ptr = (entry & IND_INDIRECTION) ? \ | ||
538 | phys_to_virt((entry & PAGE_MASK)) : ptr + 1) | ||
539 | |||
540 | static void kimage_free_entry(kimage_entry_t entry) | ||
541 | { | ||
542 | struct page *page; | ||
543 | |||
544 | page = pfn_to_page(entry >> PAGE_SHIFT); | ||
545 | kimage_free_pages(page); | ||
546 | } | ||
547 | |||
548 | void kimage_free(struct kimage *image) | ||
549 | { | ||
550 | kimage_entry_t *ptr, entry; | ||
551 | kimage_entry_t ind = 0; | ||
552 | |||
553 | if (!image) | ||
554 | return; | ||
555 | |||
556 | kimage_free_extra_pages(image); | ||
557 | for_each_kimage_entry(image, ptr, entry) { | ||
558 | if (entry & IND_INDIRECTION) { | ||
559 | /* Free the previous indirection page */ | ||
560 | if (ind & IND_INDIRECTION) | ||
561 | kimage_free_entry(ind); | ||
562 | /* Save this indirection page until we are | ||
563 | * done with it. | ||
564 | */ | ||
565 | ind = entry; | ||
566 | } else if (entry & IND_SOURCE) | ||
567 | kimage_free_entry(entry); | ||
568 | } | ||
569 | /* Free the final indirection page */ | ||
570 | if (ind & IND_INDIRECTION) | ||
571 | kimage_free_entry(ind); | ||
572 | |||
573 | /* Handle any machine specific cleanup */ | ||
574 | machine_kexec_cleanup(image); | ||
575 | |||
576 | /* Free the kexec control pages... */ | ||
577 | kimage_free_page_list(&image->control_pages); | ||
578 | |||
579 | /* | ||
580 | * Free up any temporary buffers allocated. This might hit if | ||
581 | * error occurred much later after buffer allocation. | ||
582 | */ | ||
583 | if (image->file_mode) | ||
584 | kimage_file_post_load_cleanup(image); | ||
585 | |||
586 | kfree(image); | ||
587 | } | ||
588 | |||
589 | static kimage_entry_t *kimage_dst_used(struct kimage *image, | ||
590 | unsigned long page) | ||
591 | { | ||
592 | kimage_entry_t *ptr, entry; | ||
593 | unsigned long destination = 0; | ||
594 | |||
595 | for_each_kimage_entry(image, ptr, entry) { | ||
596 | if (entry & IND_DESTINATION) | ||
597 | destination = entry & PAGE_MASK; | ||
598 | else if (entry & IND_SOURCE) { | ||
599 | if (page == destination) | ||
600 | return ptr; | ||
601 | destination += PAGE_SIZE; | ||
602 | } | ||
603 | } | ||
604 | |||
605 | return NULL; | ||
606 | } | ||
607 | |||
608 | static struct page *kimage_alloc_page(struct kimage *image, | ||
609 | gfp_t gfp_mask, | ||
610 | unsigned long destination) | ||
611 | { | ||
612 | /* | ||
613 | * Here we implement safeguards to ensure that a source page | ||
614 | * is not copied to its destination page before the data on | ||
615 | * the destination page is no longer useful. | ||
616 | * | ||
617 | * To do this we maintain the invariant that a source page is | ||
618 | * either its own destination page, or it is not a | ||
619 | * destination page at all. | ||
620 | * | ||
621 | * That is slightly stronger than required, but the proof | ||
622 | * that no problems will not occur is trivial, and the | ||
623 | * implementation is simply to verify. | ||
624 | * | ||
625 | * When allocating all pages normally this algorithm will run | ||
626 | * in O(N) time, but in the worst case it will run in O(N^2) | ||
627 | * time. If the runtime is a problem the data structures can | ||
628 | * be fixed. | ||
629 | */ | ||
630 | struct page *page; | ||
631 | unsigned long addr; | ||
632 | |||
633 | /* | ||
634 | * Walk through the list of destination pages, and see if I | ||
635 | * have a match. | ||
636 | */ | ||
637 | list_for_each_entry(page, &image->dest_pages, lru) { | ||
638 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
639 | if (addr == destination) { | ||
640 | list_del(&page->lru); | ||
641 | return page; | ||
642 | } | ||
643 | } | ||
644 | page = NULL; | ||
645 | while (1) { | ||
646 | kimage_entry_t *old; | ||
647 | |||
648 | /* Allocate a page, if we run out of memory give up */ | ||
649 | page = kimage_alloc_pages(gfp_mask, 0); | ||
650 | if (!page) | ||
651 | return NULL; | ||
652 | /* If the page cannot be used file it away */ | ||
653 | if (page_to_pfn(page) > | ||
654 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { | ||
655 | list_add(&page->lru, &image->unusable_pages); | ||
656 | continue; | ||
657 | } | ||
658 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
659 | |||
660 | /* If it is the destination page we want use it */ | ||
661 | if (addr == destination) | ||
662 | break; | ||
663 | |||
664 | /* If the page is not a destination page use it */ | ||
665 | if (!kimage_is_destination_range(image, addr, | ||
666 | addr + PAGE_SIZE)) | ||
667 | break; | ||
668 | |||
669 | /* | ||
670 | * I know that the page is someones destination page. | ||
671 | * See if there is already a source page for this | ||
672 | * destination page. And if so swap the source pages. | ||
673 | */ | ||
674 | old = kimage_dst_used(image, addr); | ||
675 | if (old) { | ||
676 | /* If so move it */ | ||
677 | unsigned long old_addr; | ||
678 | struct page *old_page; | ||
679 | |||
680 | old_addr = *old & PAGE_MASK; | ||
681 | old_page = pfn_to_page(old_addr >> PAGE_SHIFT); | ||
682 | copy_highpage(page, old_page); | ||
683 | *old = addr | (*old & ~PAGE_MASK); | ||
684 | |||
685 | /* The old page I have found cannot be a | ||
686 | * destination page, so return it if it's | ||
687 | * gfp_flags honor the ones passed in. | ||
688 | */ | ||
689 | if (!(gfp_mask & __GFP_HIGHMEM) && | ||
690 | PageHighMem(old_page)) { | ||
691 | kimage_free_pages(old_page); | ||
692 | continue; | ||
693 | } | ||
694 | addr = old_addr; | ||
695 | page = old_page; | ||
696 | break; | ||
697 | } | ||
698 | /* Place the page on the destination list, to be used later */ | ||
699 | list_add(&page->lru, &image->dest_pages); | ||
700 | } | ||
701 | |||
702 | return page; | ||
703 | } | ||
704 | |||
705 | static int kimage_load_normal_segment(struct kimage *image, | ||
706 | struct kexec_segment *segment) | ||
707 | { | ||
708 | unsigned long maddr; | ||
709 | size_t ubytes, mbytes; | ||
710 | int result; | ||
711 | unsigned char __user *buf = NULL; | ||
712 | unsigned char *kbuf = NULL; | ||
713 | |||
714 | result = 0; | ||
715 | if (image->file_mode) | ||
716 | kbuf = segment->kbuf; | ||
717 | else | ||
718 | buf = segment->buf; | ||
719 | ubytes = segment->bufsz; | ||
720 | mbytes = segment->memsz; | ||
721 | maddr = segment->mem; | ||
722 | |||
723 | result = kimage_set_destination(image, maddr); | ||
724 | if (result < 0) | ||
725 | goto out; | ||
726 | |||
727 | while (mbytes) { | ||
728 | struct page *page; | ||
729 | char *ptr; | ||
730 | size_t uchunk, mchunk; | ||
731 | |||
732 | page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); | ||
733 | if (!page) { | ||
734 | result = -ENOMEM; | ||
735 | goto out; | ||
736 | } | ||
737 | result = kimage_add_page(image, page_to_pfn(page) | ||
738 | << PAGE_SHIFT); | ||
739 | if (result < 0) | ||
740 | goto out; | ||
741 | |||
742 | ptr = kmap(page); | ||
743 | /* Start with a clear page */ | ||
744 | clear_page(ptr); | ||
745 | ptr += maddr & ~PAGE_MASK; | ||
746 | mchunk = min_t(size_t, mbytes, | ||
747 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
748 | uchunk = min(ubytes, mchunk); | ||
749 | |||
750 | /* For file based kexec, source pages are in kernel memory */ | ||
751 | if (image->file_mode) | ||
752 | memcpy(ptr, kbuf, uchunk); | ||
753 | else | ||
754 | result = copy_from_user(ptr, buf, uchunk); | ||
755 | kunmap(page); | ||
756 | if (result) { | ||
757 | result = -EFAULT; | ||
758 | goto out; | ||
759 | } | ||
760 | ubytes -= uchunk; | ||
761 | maddr += mchunk; | ||
762 | if (image->file_mode) | ||
763 | kbuf += mchunk; | ||
764 | else | ||
765 | buf += mchunk; | ||
766 | mbytes -= mchunk; | ||
767 | } | ||
768 | out: | ||
769 | return result; | ||
770 | } | ||
771 | |||
772 | static int kimage_load_crash_segment(struct kimage *image, | ||
773 | struct kexec_segment *segment) | ||
774 | { | ||
775 | /* For crash dumps kernels we simply copy the data from | ||
776 | * user space to it's destination. | ||
777 | * We do things a page at a time for the sake of kmap. | ||
778 | */ | ||
779 | unsigned long maddr; | ||
780 | size_t ubytes, mbytes; | ||
781 | int result; | ||
782 | unsigned char __user *buf = NULL; | ||
783 | unsigned char *kbuf = NULL; | ||
784 | |||
785 | result = 0; | ||
786 | if (image->file_mode) | ||
787 | kbuf = segment->kbuf; | ||
788 | else | ||
789 | buf = segment->buf; | ||
790 | ubytes = segment->bufsz; | ||
791 | mbytes = segment->memsz; | ||
792 | maddr = segment->mem; | ||
793 | while (mbytes) { | ||
794 | struct page *page; | ||
795 | char *ptr; | ||
796 | size_t uchunk, mchunk; | ||
797 | |||
798 | page = pfn_to_page(maddr >> PAGE_SHIFT); | ||
799 | if (!page) { | ||
800 | result = -ENOMEM; | ||
801 | goto out; | ||
802 | } | ||
803 | ptr = kmap(page); | ||
804 | ptr += maddr & ~PAGE_MASK; | ||
805 | mchunk = min_t(size_t, mbytes, | ||
806 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
807 | uchunk = min(ubytes, mchunk); | ||
808 | if (mchunk > uchunk) { | ||
809 | /* Zero the trailing part of the page */ | ||
810 | memset(ptr + uchunk, 0, mchunk - uchunk); | ||
811 | } | ||
812 | |||
813 | /* For file based kexec, source pages are in kernel memory */ | ||
814 | if (image->file_mode) | ||
815 | memcpy(ptr, kbuf, uchunk); | ||
816 | else | ||
817 | result = copy_from_user(ptr, buf, uchunk); | ||
818 | kexec_flush_icache_page(page); | ||
819 | kunmap(page); | ||
820 | if (result) { | ||
821 | result = -EFAULT; | ||
822 | goto out; | ||
823 | } | ||
824 | ubytes -= uchunk; | ||
825 | maddr += mchunk; | ||
826 | if (image->file_mode) | ||
827 | kbuf += mchunk; | ||
828 | else | ||
829 | buf += mchunk; | ||
830 | mbytes -= mchunk; | ||
831 | } | ||
832 | out: | ||
833 | return result; | ||
834 | } | ||
835 | |||
836 | int kimage_load_segment(struct kimage *image, | ||
837 | struct kexec_segment *segment) | ||
838 | { | ||
839 | int result = -ENOMEM; | ||
840 | |||
841 | switch (image->type) { | ||
842 | case KEXEC_TYPE_DEFAULT: | ||
843 | result = kimage_load_normal_segment(image, segment); | ||
844 | break; | ||
845 | case KEXEC_TYPE_CRASH: | ||
846 | result = kimage_load_crash_segment(image, segment); | ||
847 | break; | ||
848 | } | ||
849 | |||
850 | return result; | ||
851 | } | ||
852 | |||
853 | struct kimage *kexec_image; | ||
854 | struct kimage *kexec_crash_image; | ||
855 | int kexec_load_disabled; | ||
856 | |||
857 | void crash_kexec(struct pt_regs *regs) | ||
858 | { | ||
859 | /* Take the kexec_mutex here to prevent sys_kexec_load | ||
860 | * running on one cpu from replacing the crash kernel | ||
861 | * we are using after a panic on a different cpu. | ||
862 | * | ||
863 | * If the crash kernel was not located in a fixed area | ||
864 | * of memory the xchg(&kexec_crash_image) would be | ||
865 | * sufficient. But since I reuse the memory... | ||
866 | */ | ||
867 | if (mutex_trylock(&kexec_mutex)) { | ||
868 | if (kexec_crash_image) { | ||
869 | struct pt_regs fixed_regs; | ||
870 | |||
871 | crash_setup_regs(&fixed_regs, regs); | ||
872 | crash_save_vmcoreinfo(); | ||
873 | machine_crash_shutdown(&fixed_regs); | ||
874 | machine_kexec(kexec_crash_image); | ||
875 | } | ||
876 | mutex_unlock(&kexec_mutex); | ||
877 | } | ||
878 | } | ||
879 | |||
880 | size_t crash_get_memory_size(void) | ||
881 | { | ||
882 | size_t size = 0; | ||
883 | |||
884 | mutex_lock(&kexec_mutex); | ||
885 | if (crashk_res.end != crashk_res.start) | ||
886 | size = resource_size(&crashk_res); | ||
887 | mutex_unlock(&kexec_mutex); | ||
888 | return size; | ||
889 | } | ||
890 | |||
891 | void __weak crash_free_reserved_phys_range(unsigned long begin, | ||
892 | unsigned long end) | ||
893 | { | ||
894 | unsigned long addr; | ||
895 | |||
896 | for (addr = begin; addr < end; addr += PAGE_SIZE) | ||
897 | free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); | ||
898 | } | ||
899 | |||
900 | int crash_shrink_memory(unsigned long new_size) | ||
901 | { | ||
902 | int ret = 0; | ||
903 | unsigned long start, end; | ||
904 | unsigned long old_size; | ||
905 | struct resource *ram_res; | ||
906 | |||
907 | mutex_lock(&kexec_mutex); | ||
908 | |||
909 | if (kexec_crash_image) { | ||
910 | ret = -ENOENT; | ||
911 | goto unlock; | ||
912 | } | ||
913 | start = crashk_res.start; | ||
914 | end = crashk_res.end; | ||
915 | old_size = (end == 0) ? 0 : end - start + 1; | ||
916 | if (new_size >= old_size) { | ||
917 | ret = (new_size == old_size) ? 0 : -EINVAL; | ||
918 | goto unlock; | ||
919 | } | ||
920 | |||
921 | ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); | ||
922 | if (!ram_res) { | ||
923 | ret = -ENOMEM; | ||
924 | goto unlock; | ||
925 | } | ||
926 | |||
927 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); | ||
928 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); | ||
929 | |||
930 | crash_map_reserved_pages(); | ||
931 | crash_free_reserved_phys_range(end, crashk_res.end); | ||
932 | |||
933 | if ((start == end) && (crashk_res.parent != NULL)) | ||
934 | release_resource(&crashk_res); | ||
935 | |||
936 | ram_res->start = end; | ||
937 | ram_res->end = crashk_res.end; | ||
938 | ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; | ||
939 | ram_res->name = "System RAM"; | ||
940 | |||
941 | crashk_res.end = end - 1; | ||
942 | |||
943 | insert_resource(&iomem_resource, ram_res); | ||
944 | crash_unmap_reserved_pages(); | ||
945 | |||
946 | unlock: | ||
947 | mutex_unlock(&kexec_mutex); | ||
948 | return ret; | ||
949 | } | ||
950 | |||
951 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, | ||
952 | size_t data_len) | ||
953 | { | ||
954 | struct elf_note note; | ||
955 | |||
956 | note.n_namesz = strlen(name) + 1; | ||
957 | note.n_descsz = data_len; | ||
958 | note.n_type = type; | ||
959 | memcpy(buf, ¬e, sizeof(note)); | ||
960 | buf += (sizeof(note) + 3)/4; | ||
961 | memcpy(buf, name, note.n_namesz); | ||
962 | buf += (note.n_namesz + 3)/4; | ||
963 | memcpy(buf, data, note.n_descsz); | ||
964 | buf += (note.n_descsz + 3)/4; | ||
965 | |||
966 | return buf; | ||
967 | } | ||
968 | |||
969 | static void final_note(u32 *buf) | ||
970 | { | ||
971 | struct elf_note note; | ||
972 | |||
973 | note.n_namesz = 0; | ||
974 | note.n_descsz = 0; | ||
975 | note.n_type = 0; | ||
976 | memcpy(buf, ¬e, sizeof(note)); | ||
977 | } | ||
978 | |||
979 | void crash_save_cpu(struct pt_regs *regs, int cpu) | ||
980 | { | ||
981 | struct elf_prstatus prstatus; | ||
982 | u32 *buf; | ||
983 | |||
984 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) | ||
985 | return; | ||
986 | |||
987 | /* Using ELF notes here is opportunistic. | ||
988 | * I need a well defined structure format | ||
989 | * for the data I pass, and I need tags | ||
990 | * on the data to indicate what information I have | ||
991 | * squirrelled away. ELF notes happen to provide | ||
992 | * all of that, so there is no need to invent something new. | ||
993 | */ | ||
994 | buf = (u32 *)per_cpu_ptr(crash_notes, cpu); | ||
995 | if (!buf) | ||
996 | return; | ||
997 | memset(&prstatus, 0, sizeof(prstatus)); | ||
998 | prstatus.pr_pid = current->pid; | ||
999 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); | ||
1000 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | ||
1001 | &prstatus, sizeof(prstatus)); | ||
1002 | final_note(buf); | ||
1003 | } | ||
1004 | |||
1005 | static int __init crash_notes_memory_init(void) | ||
1006 | { | ||
1007 | /* Allocate memory for saving cpu registers. */ | ||
1008 | crash_notes = alloc_percpu(note_buf_t); | ||
1009 | if (!crash_notes) { | ||
1010 | pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); | ||
1011 | return -ENOMEM; | ||
1012 | } | ||
1013 | return 0; | ||
1014 | } | ||
1015 | subsys_initcall(crash_notes_memory_init); | ||
1016 | |||
1017 | |||
1018 | /* | ||
1019 | * parsing the "crashkernel" commandline | ||
1020 | * | ||
1021 | * this code is intended to be called from architecture specific code | ||
1022 | */ | ||
1023 | |||
1024 | |||
1025 | /* | ||
1026 | * This function parses command lines in the format | ||
1027 | * | ||
1028 | * crashkernel=ramsize-range:size[,...][@offset] | ||
1029 | * | ||
1030 | * The function returns 0 on success and -EINVAL on failure. | ||
1031 | */ | ||
1032 | static int __init parse_crashkernel_mem(char *cmdline, | ||
1033 | unsigned long long system_ram, | ||
1034 | unsigned long long *crash_size, | ||
1035 | unsigned long long *crash_base) | ||
1036 | { | ||
1037 | char *cur = cmdline, *tmp; | ||
1038 | |||
1039 | /* for each entry of the comma-separated list */ | ||
1040 | do { | ||
1041 | unsigned long long start, end = ULLONG_MAX, size; | ||
1042 | |||
1043 | /* get the start of the range */ | ||
1044 | start = memparse(cur, &tmp); | ||
1045 | if (cur == tmp) { | ||
1046 | pr_warn("crashkernel: Memory value expected\n"); | ||
1047 | return -EINVAL; | ||
1048 | } | ||
1049 | cur = tmp; | ||
1050 | if (*cur != '-') { | ||
1051 | pr_warn("crashkernel: '-' expected\n"); | ||
1052 | return -EINVAL; | ||
1053 | } | ||
1054 | cur++; | ||
1055 | |||
1056 | /* if no ':' is here, than we read the end */ | ||
1057 | if (*cur != ':') { | ||
1058 | end = memparse(cur, &tmp); | ||
1059 | if (cur == tmp) { | ||
1060 | pr_warn("crashkernel: Memory value expected\n"); | ||
1061 | return -EINVAL; | ||
1062 | } | ||
1063 | cur = tmp; | ||
1064 | if (end <= start) { | ||
1065 | pr_warn("crashkernel: end <= start\n"); | ||
1066 | return -EINVAL; | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | if (*cur != ':') { | ||
1071 | pr_warn("crashkernel: ':' expected\n"); | ||
1072 | return -EINVAL; | ||
1073 | } | ||
1074 | cur++; | ||
1075 | |||
1076 | size = memparse(cur, &tmp); | ||
1077 | if (cur == tmp) { | ||
1078 | pr_warn("Memory value expected\n"); | ||
1079 | return -EINVAL; | ||
1080 | } | ||
1081 | cur = tmp; | ||
1082 | if (size >= system_ram) { | ||
1083 | pr_warn("crashkernel: invalid size\n"); | ||
1084 | return -EINVAL; | ||
1085 | } | ||
1086 | |||
1087 | /* match ? */ | ||
1088 | if (system_ram >= start && system_ram < end) { | ||
1089 | *crash_size = size; | ||
1090 | break; | ||
1091 | } | ||
1092 | } while (*cur++ == ','); | ||
1093 | |||
1094 | if (*crash_size > 0) { | ||
1095 | while (*cur && *cur != ' ' && *cur != '@') | ||
1096 | cur++; | ||
1097 | if (*cur == '@') { | ||
1098 | cur++; | ||
1099 | *crash_base = memparse(cur, &tmp); | ||
1100 | if (cur == tmp) { | ||
1101 | pr_warn("Memory value expected after '@'\n"); | ||
1102 | return -EINVAL; | ||
1103 | } | ||
1104 | } | ||
1105 | } | ||
1106 | |||
1107 | return 0; | ||
1108 | } | ||
1109 | |||
1110 | /* | ||
1111 | * That function parses "simple" (old) crashkernel command lines like | ||
1112 | * | ||
1113 | * crashkernel=size[@offset] | ||
1114 | * | ||
1115 | * It returns 0 on success and -EINVAL on failure. | ||
1116 | */ | ||
1117 | static int __init parse_crashkernel_simple(char *cmdline, | ||
1118 | unsigned long long *crash_size, | ||
1119 | unsigned long long *crash_base) | ||
1120 | { | ||
1121 | char *cur = cmdline; | ||
1122 | |||
1123 | *crash_size = memparse(cmdline, &cur); | ||
1124 | if (cmdline == cur) { | ||
1125 | pr_warn("crashkernel: memory value expected\n"); | ||
1126 | return -EINVAL; | ||
1127 | } | ||
1128 | |||
1129 | if (*cur == '@') | ||
1130 | *crash_base = memparse(cur+1, &cur); | ||
1131 | else if (*cur != ' ' && *cur != '\0') { | ||
1132 | pr_warn("crashkernel: unrecognized char\n"); | ||
1133 | return -EINVAL; | ||
1134 | } | ||
1135 | |||
1136 | return 0; | ||
1137 | } | ||
1138 | |||
1139 | #define SUFFIX_HIGH 0 | ||
1140 | #define SUFFIX_LOW 1 | ||
1141 | #define SUFFIX_NULL 2 | ||
1142 | static __initdata char *suffix_tbl[] = { | ||
1143 | [SUFFIX_HIGH] = ",high", | ||
1144 | [SUFFIX_LOW] = ",low", | ||
1145 | [SUFFIX_NULL] = NULL, | ||
1146 | }; | ||
1147 | |||
1148 | /* | ||
1149 | * That function parses "suffix" crashkernel command lines like | ||
1150 | * | ||
1151 | * crashkernel=size,[high|low] | ||
1152 | * | ||
1153 | * It returns 0 on success and -EINVAL on failure. | ||
1154 | */ | ||
1155 | static int __init parse_crashkernel_suffix(char *cmdline, | ||
1156 | unsigned long long *crash_size, | ||
1157 | const char *suffix) | ||
1158 | { | ||
1159 | char *cur = cmdline; | ||
1160 | |||
1161 | *crash_size = memparse(cmdline, &cur); | ||
1162 | if (cmdline == cur) { | ||
1163 | pr_warn("crashkernel: memory value expected\n"); | ||
1164 | return -EINVAL; | ||
1165 | } | ||
1166 | |||
1167 | /* check with suffix */ | ||
1168 | if (strncmp(cur, suffix, strlen(suffix))) { | ||
1169 | pr_warn("crashkernel: unrecognized char\n"); | ||
1170 | return -EINVAL; | ||
1171 | } | ||
1172 | cur += strlen(suffix); | ||
1173 | if (*cur != ' ' && *cur != '\0') { | ||
1174 | pr_warn("crashkernel: unrecognized char\n"); | ||
1175 | return -EINVAL; | ||
1176 | } | ||
1177 | |||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1181 | static __init char *get_last_crashkernel(char *cmdline, | ||
1182 | const char *name, | ||
1183 | const char *suffix) | ||
1184 | { | ||
1185 | char *p = cmdline, *ck_cmdline = NULL; | ||
1186 | |||
1187 | /* find crashkernel and use the last one if there are more */ | ||
1188 | p = strstr(p, name); | ||
1189 | while (p) { | ||
1190 | char *end_p = strchr(p, ' '); | ||
1191 | char *q; | ||
1192 | |||
1193 | if (!end_p) | ||
1194 | end_p = p + strlen(p); | ||
1195 | |||
1196 | if (!suffix) { | ||
1197 | int i; | ||
1198 | |||
1199 | /* skip the one with any known suffix */ | ||
1200 | for (i = 0; suffix_tbl[i]; i++) { | ||
1201 | q = end_p - strlen(suffix_tbl[i]); | ||
1202 | if (!strncmp(q, suffix_tbl[i], | ||
1203 | strlen(suffix_tbl[i]))) | ||
1204 | goto next; | ||
1205 | } | ||
1206 | ck_cmdline = p; | ||
1207 | } else { | ||
1208 | q = end_p - strlen(suffix); | ||
1209 | if (!strncmp(q, suffix, strlen(suffix))) | ||
1210 | ck_cmdline = p; | ||
1211 | } | ||
1212 | next: | ||
1213 | p = strstr(p+1, name); | ||
1214 | } | ||
1215 | |||
1216 | if (!ck_cmdline) | ||
1217 | return NULL; | ||
1218 | |||
1219 | return ck_cmdline; | ||
1220 | } | ||
1221 | |||
1222 | static int __init __parse_crashkernel(char *cmdline, | ||
1223 | unsigned long long system_ram, | ||
1224 | unsigned long long *crash_size, | ||
1225 | unsigned long long *crash_base, | ||
1226 | const char *name, | ||
1227 | const char *suffix) | ||
1228 | { | ||
1229 | char *first_colon, *first_space; | ||
1230 | char *ck_cmdline; | ||
1231 | |||
1232 | BUG_ON(!crash_size || !crash_base); | ||
1233 | *crash_size = 0; | ||
1234 | *crash_base = 0; | ||
1235 | |||
1236 | ck_cmdline = get_last_crashkernel(cmdline, name, suffix); | ||
1237 | |||
1238 | if (!ck_cmdline) | ||
1239 | return -EINVAL; | ||
1240 | |||
1241 | ck_cmdline += strlen(name); | ||
1242 | |||
1243 | if (suffix) | ||
1244 | return parse_crashkernel_suffix(ck_cmdline, crash_size, | ||
1245 | suffix); | ||
1246 | /* | ||
1247 | * if the commandline contains a ':', then that's the extended | ||
1248 | * syntax -- if not, it must be the classic syntax | ||
1249 | */ | ||
1250 | first_colon = strchr(ck_cmdline, ':'); | ||
1251 | first_space = strchr(ck_cmdline, ' '); | ||
1252 | if (first_colon && (!first_space || first_colon < first_space)) | ||
1253 | return parse_crashkernel_mem(ck_cmdline, system_ram, | ||
1254 | crash_size, crash_base); | ||
1255 | |||
1256 | return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * That function is the entry point for command line parsing and should be | ||
1261 | * called from the arch-specific code. | ||
1262 | */ | ||
1263 | int __init parse_crashkernel(char *cmdline, | ||
1264 | unsigned long long system_ram, | ||
1265 | unsigned long long *crash_size, | ||
1266 | unsigned long long *crash_base) | ||
1267 | { | ||
1268 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1269 | "crashkernel=", NULL); | ||
1270 | } | ||
1271 | |||
1272 | int __init parse_crashkernel_high(char *cmdline, | ||
1273 | unsigned long long system_ram, | ||
1274 | unsigned long long *crash_size, | ||
1275 | unsigned long long *crash_base) | ||
1276 | { | ||
1277 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1278 | "crashkernel=", suffix_tbl[SUFFIX_HIGH]); | ||
1279 | } | ||
1280 | |||
1281 | int __init parse_crashkernel_low(char *cmdline, | ||
1282 | unsigned long long system_ram, | ||
1283 | unsigned long long *crash_size, | ||
1284 | unsigned long long *crash_base) | ||
1285 | { | ||
1286 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1287 | "crashkernel=", suffix_tbl[SUFFIX_LOW]); | ||
1288 | } | ||
1289 | |||
1290 | static void update_vmcoreinfo_note(void) | ||
1291 | { | ||
1292 | u32 *buf = vmcoreinfo_note; | ||
1293 | |||
1294 | if (!vmcoreinfo_size) | ||
1295 | return; | ||
1296 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | ||
1297 | vmcoreinfo_size); | ||
1298 | final_note(buf); | ||
1299 | } | ||
1300 | |||
1301 | void crash_save_vmcoreinfo(void) | ||
1302 | { | ||
1303 | vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); | ||
1304 | update_vmcoreinfo_note(); | ||
1305 | } | ||
1306 | |||
1307 | void vmcoreinfo_append_str(const char *fmt, ...) | ||
1308 | { | ||
1309 | va_list args; | ||
1310 | char buf[0x50]; | ||
1311 | size_t r; | ||
1312 | |||
1313 | va_start(args, fmt); | ||
1314 | r = vscnprintf(buf, sizeof(buf), fmt, args); | ||
1315 | va_end(args); | ||
1316 | |||
1317 | r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); | ||
1318 | |||
1319 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); | ||
1320 | |||
1321 | vmcoreinfo_size += r; | ||
1322 | } | ||
1323 | |||
1324 | /* | ||
1325 | * provide an empty default implementation here -- architecture | ||
1326 | * code may override this | ||
1327 | */ | ||
1328 | void __weak arch_crash_save_vmcoreinfo(void) | ||
1329 | {} | ||
1330 | |||
1331 | unsigned long __weak paddr_vmcoreinfo_note(void) | ||
1332 | { | ||
1333 | return __pa((unsigned long)(char *)&vmcoreinfo_note); | ||
1334 | } | ||
1335 | |||
1336 | static int __init crash_save_vmcoreinfo_init(void) | ||
1337 | { | ||
1338 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); | ||
1339 | VMCOREINFO_PAGESIZE(PAGE_SIZE); | ||
1340 | |||
1341 | VMCOREINFO_SYMBOL(init_uts_ns); | ||
1342 | VMCOREINFO_SYMBOL(node_online_map); | ||
1343 | #ifdef CONFIG_MMU | ||
1344 | VMCOREINFO_SYMBOL(swapper_pg_dir); | ||
1345 | #endif | ||
1346 | VMCOREINFO_SYMBOL(_stext); | ||
1347 | VMCOREINFO_SYMBOL(vmap_area_list); | ||
1348 | |||
1349 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
1350 | VMCOREINFO_SYMBOL(mem_map); | ||
1351 | VMCOREINFO_SYMBOL(contig_page_data); | ||
1352 | #endif | ||
1353 | #ifdef CONFIG_SPARSEMEM | ||
1354 | VMCOREINFO_SYMBOL(mem_section); | ||
1355 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); | ||
1356 | VMCOREINFO_STRUCT_SIZE(mem_section); | ||
1357 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | ||
1358 | #endif | ||
1359 | VMCOREINFO_STRUCT_SIZE(page); | ||
1360 | VMCOREINFO_STRUCT_SIZE(pglist_data); | ||
1361 | VMCOREINFO_STRUCT_SIZE(zone); | ||
1362 | VMCOREINFO_STRUCT_SIZE(free_area); | ||
1363 | VMCOREINFO_STRUCT_SIZE(list_head); | ||
1364 | VMCOREINFO_SIZE(nodemask_t); | ||
1365 | VMCOREINFO_OFFSET(page, flags); | ||
1366 | VMCOREINFO_OFFSET(page, _count); | ||
1367 | VMCOREINFO_OFFSET(page, mapping); | ||
1368 | VMCOREINFO_OFFSET(page, lru); | ||
1369 | VMCOREINFO_OFFSET(page, _mapcount); | ||
1370 | VMCOREINFO_OFFSET(page, private); | ||
1371 | VMCOREINFO_OFFSET(pglist_data, node_zones); | ||
1372 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | ||
1373 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
1374 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); | ||
1375 | #endif | ||
1376 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); | ||
1377 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); | ||
1378 | VMCOREINFO_OFFSET(pglist_data, node_id); | ||
1379 | VMCOREINFO_OFFSET(zone, free_area); | ||
1380 | VMCOREINFO_OFFSET(zone, vm_stat); | ||
1381 | VMCOREINFO_OFFSET(zone, spanned_pages); | ||
1382 | VMCOREINFO_OFFSET(free_area, free_list); | ||
1383 | VMCOREINFO_OFFSET(list_head, next); | ||
1384 | VMCOREINFO_OFFSET(list_head, prev); | ||
1385 | VMCOREINFO_OFFSET(vmap_area, va_start); | ||
1386 | VMCOREINFO_OFFSET(vmap_area, list); | ||
1387 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | ||
1388 | log_buf_kexec_setup(); | ||
1389 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | ||
1390 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | ||
1391 | VMCOREINFO_NUMBER(PG_lru); | ||
1392 | VMCOREINFO_NUMBER(PG_private); | ||
1393 | VMCOREINFO_NUMBER(PG_swapcache); | ||
1394 | VMCOREINFO_NUMBER(PG_slab); | ||
1395 | #ifdef CONFIG_MEMORY_FAILURE | ||
1396 | VMCOREINFO_NUMBER(PG_hwpoison); | ||
1397 | #endif | ||
1398 | VMCOREINFO_NUMBER(PG_head_mask); | ||
1399 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | ||
1400 | #ifdef CONFIG_HUGETLBFS | ||
1401 | VMCOREINFO_SYMBOL(free_huge_page); | ||
1402 | #endif | ||
1403 | |||
1404 | arch_crash_save_vmcoreinfo(); | ||
1405 | update_vmcoreinfo_note(); | ||
1406 | |||
1407 | return 0; | ||
1408 | } | ||
1409 | |||
1410 | subsys_initcall(crash_save_vmcoreinfo_init); | ||
1411 | |||
1412 | /* | ||
1413 | * Move into place and start executing a preloaded standalone | ||
1414 | * executable. If nothing was preloaded return an error. | ||
1415 | */ | ||
1416 | int kernel_kexec(void) | ||
1417 | { | ||
1418 | int error = 0; | ||
1419 | |||
1420 | if (!mutex_trylock(&kexec_mutex)) | ||
1421 | return -EBUSY; | ||
1422 | if (!kexec_image) { | ||
1423 | error = -EINVAL; | ||
1424 | goto Unlock; | ||
1425 | } | ||
1426 | |||
1427 | #ifdef CONFIG_KEXEC_JUMP | ||
1428 | if (kexec_image->preserve_context) { | ||
1429 | lock_system_sleep(); | ||
1430 | pm_prepare_console(); | ||
1431 | error = freeze_processes(); | ||
1432 | if (error) { | ||
1433 | error = -EBUSY; | ||
1434 | goto Restore_console; | ||
1435 | } | ||
1436 | suspend_console(); | ||
1437 | error = dpm_suspend_start(PMSG_FREEZE); | ||
1438 | if (error) | ||
1439 | goto Resume_console; | ||
1440 | /* At this point, dpm_suspend_start() has been called, | ||
1441 | * but *not* dpm_suspend_end(). We *must* call | ||
1442 | * dpm_suspend_end() now. Otherwise, drivers for | ||
1443 | * some devices (e.g. interrupt controllers) become | ||
1444 | * desynchronized with the actual state of the | ||
1445 | * hardware at resume time, and evil weirdness ensues. | ||
1446 | */ | ||
1447 | error = dpm_suspend_end(PMSG_FREEZE); | ||
1448 | if (error) | ||
1449 | goto Resume_devices; | ||
1450 | error = disable_nonboot_cpus(); | ||
1451 | if (error) | ||
1452 | goto Enable_cpus; | ||
1453 | local_irq_disable(); | ||
1454 | error = syscore_suspend(); | ||
1455 | if (error) | ||
1456 | goto Enable_irqs; | ||
1457 | } else | ||
1458 | #endif | ||
1459 | { | ||
1460 | kexec_in_progress = true; | ||
1461 | kernel_restart_prepare(NULL); | ||
1462 | migrate_to_reboot_cpu(); | ||
1463 | |||
1464 | /* | ||
1465 | * migrate_to_reboot_cpu() disables CPU hotplug assuming that | ||
1466 | * no further code needs to use CPU hotplug (which is true in | ||
1467 | * the reboot case). However, the kexec path depends on using | ||
1468 | * CPU hotplug again; so re-enable it here. | ||
1469 | */ | ||
1470 | cpu_hotplug_enable(); | ||
1471 | pr_emerg("Starting new kernel\n"); | ||
1472 | machine_shutdown(); | ||
1473 | } | ||
1474 | |||
1475 | machine_kexec(kexec_image); | ||
1476 | |||
1477 | #ifdef CONFIG_KEXEC_JUMP | ||
1478 | if (kexec_image->preserve_context) { | ||
1479 | syscore_resume(); | ||
1480 | Enable_irqs: | ||
1481 | local_irq_enable(); | ||
1482 | Enable_cpus: | ||
1483 | enable_nonboot_cpus(); | ||
1484 | dpm_resume_start(PMSG_RESTORE); | ||
1485 | Resume_devices: | ||
1486 | dpm_resume_end(PMSG_RESTORE); | ||
1487 | Resume_console: | ||
1488 | resume_console(); | ||
1489 | thaw_processes(); | ||
1490 | Restore_console: | ||
1491 | pm_restore_console(); | ||
1492 | unlock_system_sleep(); | ||
1493 | } | ||
1494 | #endif | ||
1495 | |||
1496 | Unlock: | ||
1497 | mutex_unlock(&kexec_mutex); | ||
1498 | return error; | ||
1499 | } | ||
1500 | |||
1501 | /* | ||
1502 | * Add and remove page tables for crashkernel memory | ||
1503 | * | ||
1504 | * Provide an empty default implementation here -- architecture | ||
1505 | * code may override this | ||
1506 | */ | ||
1507 | void __weak crash_map_reserved_pages(void) | ||
1508 | {} | ||
1509 | |||
1510 | void __weak crash_unmap_reserved_pages(void) | ||
1511 | {} | ||
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 6683ccef9fff..e83b26464061 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -90,7 +90,7 @@ static ssize_t profiling_store(struct kobject *kobj, | |||
90 | KERNEL_ATTR_RW(profiling); | 90 | KERNEL_ATTR_RW(profiling); |
91 | #endif | 91 | #endif |
92 | 92 | ||
93 | #ifdef CONFIG_KEXEC | 93 | #ifdef CONFIG_KEXEC_CORE |
94 | static ssize_t kexec_loaded_show(struct kobject *kobj, | 94 | static ssize_t kexec_loaded_show(struct kobject *kobj, |
95 | struct kobj_attribute *attr, char *buf) | 95 | struct kobj_attribute *attr, char *buf) |
96 | { | 96 | { |
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj, | |||
134 | } | 134 | } |
135 | KERNEL_ATTR_RO(vmcoreinfo); | 135 | KERNEL_ATTR_RO(vmcoreinfo); |
136 | 136 | ||
137 | #endif /* CONFIG_KEXEC */ | 137 | #endif /* CONFIG_KEXEC_CORE */ |
138 | 138 | ||
139 | /* whether file capabilities are enabled */ | 139 | /* whether file capabilities are enabled */ |
140 | static ssize_t fscaps_show(struct kobject *kobj, | 140 | static ssize_t fscaps_show(struct kobject *kobj, |
@@ -196,7 +196,7 @@ static struct attribute * kernel_attrs[] = { | |||
196 | #ifdef CONFIG_PROFILING | 196 | #ifdef CONFIG_PROFILING |
197 | &profiling_attr.attr, | 197 | &profiling_attr.attr, |
198 | #endif | 198 | #endif |
199 | #ifdef CONFIG_KEXEC | 199 | #ifdef CONFIG_KEXEC_CORE |
200 | &kexec_loaded_attr.attr, | 200 | &kexec_loaded_attr.attr, |
201 | &kexec_crash_loaded_attr.attr, | 201 | &kexec_crash_loaded_attr.attr, |
202 | &kexec_crash_size_attr.attr, | 202 | &kexec_crash_size_attr.attr, |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index cf8c24203368..8f0324ef72ab 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -835,7 +835,7 @@ const struct file_operations kmsg_fops = { | |||
835 | .release = devkmsg_release, | 835 | .release = devkmsg_release, |
836 | }; | 836 | }; |
837 | 837 | ||
838 | #ifdef CONFIG_KEXEC | 838 | #ifdef CONFIG_KEXEC_CORE |
839 | /* | 839 | /* |
840 | * This appends the listed symbols to /proc/vmcore | 840 | * This appends the listed symbols to /proc/vmcore |
841 | * | 841 | * |
diff --git a/kernel/reboot.c b/kernel/reboot.c index d20c85d9f8c0..bd30a973fe94 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c | |||
@@ -346,7 +346,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
346 | kernel_restart(buffer); | 346 | kernel_restart(buffer); |
347 | break; | 347 | break; |
348 | 348 | ||
349 | #ifdef CONFIG_KEXEC | 349 | #ifdef CONFIG_KEXEC_CORE |
350 | case LINUX_REBOOT_CMD_KEXEC: | 350 | case LINUX_REBOOT_CMD_KEXEC: |
351 | ret = kernel_kexec(); | 351 | ret = kernel_kexec(); |
352 | break; | 352 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 19b62b522158..715cc57cc66a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -621,7 +621,7 @@ static struct ctl_table kern_table[] = { | |||
621 | .proc_handler = proc_dointvec, | 621 | .proc_handler = proc_dointvec, |
622 | }, | 622 | }, |
623 | #endif | 623 | #endif |
624 | #ifdef CONFIG_KEXEC | 624 | #ifdef CONFIG_KEXEC_CORE |
625 | { | 625 | { |
626 | .procname = "kexec_load_disabled", | 626 | .procname = "kexec_load_disabled", |
627 | .data = &kexec_load_disabled, | 627 | .data = &kexec_load_disabled, |