aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2008-07-25 22:45:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-26 15:00:04 -0400
commit3ab83521378268044a448113c6aa9a9e245f4d2f (patch)
treee9e8496577e4b2e994edf204e9a8ae7c026eec95
parent7fccf0326536c1b245b98740d489abb9aab69a12 (diff)
kexec jump
This patch provides an enhancement to kexec/kdump. It implements the following features: - Backup/restore memory used by the original kernel before/after kexec. - Save/restore CPU state before/after kexec. The features of this patch can be used as a general method to call program in physical mode (paging turning off). This can be used to call BIOS code under Linux. kexec-tools needs to be patched to support kexec jump. The patches and the precompiled kexec can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10 Usage example of calling some physical mode code and return: 1. Compile and install patched kernel with following options selected: CONFIG_X86_32=y CONFIG_KEXEC=y CONFIG_PM=y CONFIG_KEXEC_JUMP=y 2. Build patched kexec-tool or download the pre-built one. 3. Build some physical mode executable named such as "phy_mode" 4. Boot kernel compiled in step 1. 5. Load physical mode executable with /sbin/kexec. The shell command line can be as follow: /sbin/kexec --load-preserve-context --args-none phy_mode 6. Call physical mode executable with following shell command line: /sbin/kexec -e Implementation point: To support jumping without reserving memory. One shadow backup page (source page) is allocated for each page used by kexeced code image (destination page). When do kexec_load, the image of kexeced code is loaded into source pages, and before executing, the destination pages and the source pages are swapped, so the contents of destination pages are backupped. Before jumping to the kexeced code image and after jumping back to the original kernel, the destination pages and the source pages are swapped too. C ABI (calling convention) is used as communication protocol between kernel and called code. A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to indicate that the loaded kernel image is used for jumping back. Now, only the i386 architecture is supported. Signed-off-by: Huang Ying <ying.huang@intel.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Nigel Cunningham <nigel@nigel.suspend2.net> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/powerpc/kernel/machine_kexec.c2
-rw-r--r--arch/sh/kernel/machine_kexec.c2
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/kernel/machine_kexec_32.c27
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S174
-rw-r--r--include/asm-x86/kexec.h18
-rw-r--r--include/linux/kexec.h17
-rw-r--r--kernel/kexec.c57
-rw-r--r--kernel/sys.c31
10 files changed, 269 insertions, 68 deletions
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 29a0e039d436..aab76887a842 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -48,7 +48,7 @@ void machine_kexec_cleanup(struct kimage *image)
48 * Do not allocate memory (or fail in any way) in machine_kexec(). 48 * Do not allocate memory (or fail in any way) in machine_kexec().
49 * We are past the point of no return, committed to rebooting now. 49 * We are past the point of no return, committed to rebooting now.
50 */ 50 */
51NORET_TYPE void machine_kexec(struct kimage *image) 51void machine_kexec(struct kimage *image)
52{ 52{
53 if (ppc_md.machine_kexec) 53 if (ppc_md.machine_kexec)
54 ppc_md.machine_kexec(image); 54 ppc_md.machine_kexec(image);
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 5c17de51987e..ec1eadce4aaa 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -70,7 +70,7 @@ static void kexec_info(struct kimage *image)
70 * Do not allocate memory (or fail in any way) in machine_kexec(). 70 * Do not allocate memory (or fail in any way) in machine_kexec().
71 * We are past the point of no return, committed to rebooting now. 71 * We are past the point of no return, committed to rebooting now.
72 */ 72 */
73NORET_TYPE void machine_kexec(struct kimage *image) 73void machine_kexec(struct kimage *image)
74{ 74{
75 75
76 unsigned long page_list; 76 unsigned long page_list;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3cba0b45600..7ecb679f0130 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1279,6 +1279,13 @@ config CRASH_DUMP
1279 (CONFIG_RELOCATABLE=y). 1279 (CONFIG_RELOCATABLE=y).
1280 For more details see Documentation/kdump/kdump.txt 1280 For more details see Documentation/kdump/kdump.txt
1281 1281
1282config KEXEC_JUMP
1283 bool "kexec jump (EXPERIMENTAL)"
1284 depends on EXPERIMENTAL
1285 depends on KEXEC && PM_SLEEP && X86_32
1286 help
1287 Invoke code in physical address mode via KEXEC
1288
1282config PHYSICAL_START 1289config PHYSICAL_START
1283 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 1290 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
1284 default "0x1000000" if X86_NUMAQ 1291 default "0x1000000" if X86_NUMAQ
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55af..2b67609d0a1c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -22,6 +22,7 @@
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/system.h> 24#include <asm/system.h>
25#include <asm/cacheflush.h>
25 26
26#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 27#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
27static u32 kexec_pgd[1024] PAGE_ALIGNED; 28static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -85,10 +86,12 @@ static void load_segments(void)
85 * reboot code buffer to allow us to avoid allocations 86 * reboot code buffer to allow us to avoid allocations
86 * later. 87 * later.
87 * 88 *
88 * Currently nothing. 89 * Make control page executable.
89 */ 90 */
90int machine_kexec_prepare(struct kimage *image) 91int machine_kexec_prepare(struct kimage *image)
91{ 92{
93 if (nx_enabled)
94 set_pages_x(image->control_code_page, 1);
92 return 0; 95 return 0;
93} 96}
94 97
@@ -98,16 +101,24 @@ int machine_kexec_prepare(struct kimage *image)
98 */ 101 */
99void machine_kexec_cleanup(struct kimage *image) 102void machine_kexec_cleanup(struct kimage *image)
100{ 103{
104 if (nx_enabled)
105 set_pages_nx(image->control_code_page, 1);
101} 106}
102 107
103/* 108/*
104 * Do not allocate memory (or fail in any way) in machine_kexec(). 109 * Do not allocate memory (or fail in any way) in machine_kexec().
105 * We are past the point of no return, committed to rebooting now. 110 * We are past the point of no return, committed to rebooting now.
106 */ 111 */
107NORET_TYPE void machine_kexec(struct kimage *image) 112void machine_kexec(struct kimage *image)
108{ 113{
109 unsigned long page_list[PAGES_NR]; 114 unsigned long page_list[PAGES_NR];
110 void *control_page; 115 void *control_page;
116 asmlinkage unsigned long
117 (*relocate_kernel_ptr)(unsigned long indirection_page,
118 unsigned long control_page,
119 unsigned long start_address,
120 unsigned int has_pae,
121 unsigned int preserve_context);
111 122
112 tracer_disable(); 123 tracer_disable();
113 124
@@ -115,10 +126,11 @@ NORET_TYPE void machine_kexec(struct kimage *image)
115 local_irq_disable(); 126 local_irq_disable();
116 127
117 control_page = page_address(image->control_code_page); 128 control_page = page_address(image->control_code_page);
118 memcpy(control_page, relocate_kernel, PAGE_SIZE); 129 memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
119 130
131 relocate_kernel_ptr = control_page;
120 page_list[PA_CONTROL_PAGE] = __pa(control_page); 132 page_list[PA_CONTROL_PAGE] = __pa(control_page);
121 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 133 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
122 page_list[PA_PGD] = __pa(kexec_pgd); 134 page_list[PA_PGD] = __pa(kexec_pgd);
123 page_list[VA_PGD] = (unsigned long)kexec_pgd; 135 page_list[VA_PGD] = (unsigned long)kexec_pgd;
124#ifdef CONFIG_X86_PAE 136#ifdef CONFIG_X86_PAE
@@ -131,6 +143,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
131 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 143 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
132 page_list[PA_PTE_1] = __pa(kexec_pte1); 144 page_list[PA_PTE_1] = __pa(kexec_pte1);
133 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 145 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
146 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
134 147
135 /* The segment registers are funny things, they have both a 148 /* The segment registers are funny things, they have both a
136 * visible and an invisible part. Whenever the visible part is 149 * visible and an invisible part. Whenever the visible part is
@@ -149,8 +162,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
149 set_idt(phys_to_virt(0),0); 162 set_idt(phys_to_virt(0),0);
150 163
151 /* now call it */ 164 /* now call it */
152 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 165 image->start = relocate_kernel_ptr((unsigned long)image->head,
153 image->start, cpu_has_pae); 166 (unsigned long)page_list,
167 image->start, cpu_has_pae,
168 image->preserve_context);
154} 169}
155 170
156void arch_crash_save_vmcoreinfo(void) 171void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a3..c43caa3a91f3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
181 * Do not allocate memory (or fail in any way) in machine_kexec(). 181 * Do not allocate memory (or fail in any way) in machine_kexec().
182 * We are past the point of no return, committed to rebooting now. 182 * We are past the point of no return, committed to rebooting now.
183 */ 183 */
184NORET_TYPE void machine_kexec(struct kimage *image) 184void machine_kexec(struct kimage *image)
185{ 185{
186 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
187 void *control_page; 187 void *control_page;
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470d..703310a99023 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,44 @@
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT) 21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 22
23/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are
24 * used to save some data for jumping back
25 */
26#define DATA(offset) (PAGE_SIZE/2+(offset))
27
28/* Minimal CPU state */
29#define ESP DATA(0x0)
30#define CR0 DATA(0x4)
31#define CR3 DATA(0x8)
32#define CR4 DATA(0xc)
33
34/* other data */
35#define CP_VA_CONTROL_PAGE DATA(0x10)
36#define CP_PA_PGD DATA(0x14)
37#define CP_PA_SWAP_PAGE DATA(0x18)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
39
23 .text 40 .text
24 .align PAGE_SIZE 41 .align PAGE_SIZE
25 .globl relocate_kernel 42 .globl relocate_kernel
26relocate_kernel: 43relocate_kernel:
27 movl 8(%esp), %ebp /* list of pages */ 44 /* Save the CPU context, used for jumping back */
45
46 pushl %ebx
47 pushl %esi
48 pushl %edi
49 pushl %ebp
50 pushf
51
52 movl 20+8(%esp), %ebp /* list of pages */
53 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
54 movl %esp, ESP(%edi)
55 movl %cr0, %eax
56 movl %eax, CR0(%edi)
57 movl %cr3, %eax
58 movl %eax, CR3(%edi)
59 movl %cr4, %eax
60 movl %eax, CR4(%edi)
28 61
29#ifdef CONFIG_X86_PAE 62#ifdef CONFIG_X86_PAE
30 /* map the control page at its virtual address */ 63 /* map the control page at its virtual address */
@@ -138,15 +171,25 @@ relocate_kernel:
138 171
139relocate_new_kernel: 172relocate_new_kernel:
140 /* read the arguments and say goodbye to the stack */ 173 /* read the arguments and say goodbye to the stack */
141 movl 4(%esp), %ebx /* page_list */ 174 movl 20+4(%esp), %ebx /* page_list */
142 movl 8(%esp), %ebp /* list of pages */ 175 movl 20+8(%esp), %ebp /* list of pages */
143 movl 12(%esp), %edx /* start address */ 176 movl 20+12(%esp), %edx /* start address */
144 movl 16(%esp), %ecx /* cpu_has_pae */ 177 movl 20+16(%esp), %ecx /* cpu_has_pae */
178 movl 20+20(%esp), %esi /* preserve_context */
145 179
146 /* zero out flags, and disable interrupts */ 180 /* zero out flags, and disable interrupts */
147 pushl $0 181 pushl $0
148 popfl 182 popfl
149 183
184 /* save some information for jumping back */
185 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
186 movl %edi, CP_VA_CONTROL_PAGE(%edi)
187 movl PTR(PA_PGD)(%ebp), %eax
188 movl %eax, CP_PA_PGD(%edi)
189 movl PTR(PA_SWAP_PAGE)(%ebp), %eax
190 movl %eax, CP_PA_SWAP_PAGE(%edi)
191 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
192
150 /* get physical address of control page now */ 193 /* get physical address of control page now */
151 /* this is impossible after page table switch */ 194 /* this is impossible after page table switch */
152 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi 195 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +240,90 @@ identity_mapped:
197 xorl %eax, %eax 240 xorl %eax, %eax
198 movl %eax, %cr3 241 movl %eax, %cr3
199 242
243 movl CP_PA_SWAP_PAGE(%edi), %eax
244 pushl %eax
245 pushl %ebx
246 call swap_pages
247 addl $8, %esp
248
249 /* To be certain of avoiding problems with self-modifying code
250 * I need to execute a serializing instruction here.
251 * So I flush the TLB, it's handy, and not processor dependent.
252 */
253 xorl %eax, %eax
254 movl %eax, %cr3
255
256 /* set all of the registers to known values */
257 /* leave %esp alone */
258
259 testl %esi, %esi
260 jnz 1f
261 xorl %edi, %edi
262 xorl %eax, %eax
263 xorl %ebx, %ebx
264 xorl %ecx, %ecx
265 xorl %edx, %edx
266 xorl %esi, %esi
267 xorl %ebp, %ebp
268 ret
2691:
270 popl %edx
271 movl CP_PA_SWAP_PAGE(%edi), %esp
272 addl $PAGE_SIZE, %esp
2732:
274 call *%edx
275
276 /* get the re-entry point of the peer system */
277 movl 0(%esp), %ebp
278 call 1f
2791:
280 popl %ebx
281 subl $(1b - relocate_kernel), %ebx
282 movl CP_VA_CONTROL_PAGE(%ebx), %edi
283 lea PAGE_SIZE(%ebx), %esp
284 movl CP_PA_SWAP_PAGE(%ebx), %eax
285 movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
286 pushl %eax
287 pushl %edx
288 call swap_pages
289 addl $8, %esp
290 movl CP_PA_PGD(%ebx), %eax
291 movl %eax, %cr3
292 movl %cr0, %eax
293 orl $(1<<31), %eax
294 movl %eax, %cr0
295 lea PAGE_SIZE(%edi), %esp
296 movl %edi, %eax
297 addl $(virtual_mapped - relocate_kernel), %eax
298 pushl %eax
299 ret
300
301virtual_mapped:
302 movl CR4(%edi), %eax
303 movl %eax, %cr4
304 movl CR3(%edi), %eax
305 movl %eax, %cr3
306 movl CR0(%edi), %eax
307 movl %eax, %cr0
308 movl ESP(%edi), %esp
309 movl %ebp, %eax
310
311 popf
312 popl %ebp
313 popl %edi
314 popl %esi
315 popl %ebx
316 ret
317
200 /* Do the copies */ 318 /* Do the copies */
201 movl %ebx, %ecx 319swap_pages:
320 movl 8(%esp), %edx
321 movl 4(%esp), %ecx
322 pushl %ebp
323 pushl %ebx
324 pushl %edi
325 pushl %esi
326 movl %ecx, %ebx
202 jmp 1f 327 jmp 1f
203 328
2040: /* top, read another word from the indirection page */ 3290: /* top, read another word from the indirection page */
@@ -226,27 +351,28 @@ identity_mapped:
226 movl %ecx, %esi /* For every source page do a copy */ 351 movl %ecx, %esi /* For every source page do a copy */
227 andl $0xfffff000, %esi 352 andl $0xfffff000, %esi
228 353
354 movl %edi, %eax
355 movl %esi, %ebp
356
357 movl %edx, %edi
229 movl $1024, %ecx 358 movl $1024, %ecx
230 rep ; movsl 359 rep ; movsl
231 jmp 0b
232 360
2333: 361 movl %ebp, %edi
234 362 movl %eax, %esi
235 /* To be certain of avoiding problems with self-modifying code 363 movl $1024, %ecx
236 * I need to execute a serializing instruction here. 364 rep ; movsl
237 * So I flush the TLB, it's handy, and not processor dependent.
238 */
239 xorl %eax, %eax
240 movl %eax, %cr3
241 365
242 /* set all of the registers to known values */ 366 movl %eax, %edi
243 /* leave %esp alone */ 367 movl %edx, %esi
368 movl $1024, %ecx
369 rep ; movsl
244 370
245 xorl %eax, %eax 371 lea PAGE_SIZE(%ebp), %esi
246 xorl %ebx, %ebx 372 jmp 0b
247 xorl %ecx, %ecx 3733:
248 xorl %edx, %edx 374 popl %esi
249 xorl %esi, %esi 375 popl %edi
250 xorl %edi, %edi 376 popl %ebx
251 xorl %ebp, %ebp 377 popl %ebp
252 ret 378 ret
diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 8f855a15f64d..c0e52a14fd4d 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -10,14 +10,15 @@
10# define VA_PTE_0 5 10# define VA_PTE_0 5
11# define PA_PTE_1 6 11# define PA_PTE_1 6
12# define VA_PTE_1 7 12# define VA_PTE_1 7
13# define PA_SWAP_PAGE 8
13# ifdef CONFIG_X86_PAE 14# ifdef CONFIG_X86_PAE
14# define PA_PMD_0 8 15# define PA_PMD_0 9
15# define VA_PMD_0 9 16# define VA_PMD_0 10
16# define PA_PMD_1 10 17# define PA_PMD_1 11
17# define VA_PMD_1 11 18# define VA_PMD_1 12
18# define PAGES_NR 12 19# define PAGES_NR 13
19# else 20# else
20# define PAGES_NR 8 21# define PAGES_NR 9
21# endif 22# endif
22#else 23#else
23# define PA_CONTROL_PAGE 0 24# define PA_CONTROL_PAGE 0
@@ -152,11 +153,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
152} 153}
153 154
154#ifdef CONFIG_X86_32 155#ifdef CONFIG_X86_32
155asmlinkage NORET_TYPE void 156asmlinkage unsigned long
156relocate_kernel(unsigned long indirection_page, 157relocate_kernel(unsigned long indirection_page,
157 unsigned long control_page, 158 unsigned long control_page,
158 unsigned long start_address, 159 unsigned long start_address,
159 unsigned int has_pae) ATTRIB_NORET; 160 unsigned int has_pae,
161 unsigned int preserve_context);
160#else 162#else
161NORET_TYPE void 163NORET_TYPE void
162relocate_kernel(unsigned long indirection_page, 164relocate_kernel(unsigned long indirection_page,
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 3265968cd2cd..82f88a8a827b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -83,6 +83,7 @@ struct kimage {
83 83
84 unsigned long start; 84 unsigned long start;
85 struct page *control_code_page; 85 struct page *control_code_page;
86 struct page *swap_page;
86 87
87 unsigned long nr_segments; 88 unsigned long nr_segments;
88 struct kexec_segment segment[KEXEC_SEGMENT_MAX]; 89 struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -98,18 +99,20 @@ struct kimage {
98 unsigned int type : 1; 99 unsigned int type : 1;
99#define KEXEC_TYPE_DEFAULT 0 100#define KEXEC_TYPE_DEFAULT 0
100#define KEXEC_TYPE_CRASH 1 101#define KEXEC_TYPE_CRASH 1
102 unsigned int preserve_context : 1;
101}; 103};
102 104
103 105
104 106
105/* kexec interface functions */ 107/* kexec interface functions */
106extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET; 108extern void machine_kexec(struct kimage *image);
107extern int machine_kexec_prepare(struct kimage *image); 109extern int machine_kexec_prepare(struct kimage *image);
108extern void machine_kexec_cleanup(struct kimage *image); 110extern void machine_kexec_cleanup(struct kimage *image);
109extern asmlinkage long sys_kexec_load(unsigned long entry, 111extern asmlinkage long sys_kexec_load(unsigned long entry,
110 unsigned long nr_segments, 112 unsigned long nr_segments,
111 struct kexec_segment __user *segments, 113 struct kexec_segment __user *segments,
112 unsigned long flags); 114 unsigned long flags);
115extern int kernel_kexec(void);
113#ifdef CONFIG_COMPAT 116#ifdef CONFIG_COMPAT
114extern asmlinkage long compat_sys_kexec_load(unsigned long entry, 117extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
115 unsigned long nr_segments, 118 unsigned long nr_segments,
@@ -156,8 +159,9 @@ extern struct kimage *kexec_crash_image;
156#define kexec_flush_icache_page(page) 159#define kexec_flush_icache_page(page)
157#endif 160#endif
158 161
159#define KEXEC_ON_CRASH 0x00000001 162#define KEXEC_ON_CRASH 0x00000001
160#define KEXEC_ARCH_MASK 0xffff0000 163#define KEXEC_PRESERVE_CONTEXT 0x00000002
164#define KEXEC_ARCH_MASK 0xffff0000
161 165
162/* These values match the ELF architecture values. 166/* These values match the ELF architecture values.
163 * Unless there is a good reason that should continue to be the case. 167 * Unless there is a good reason that should continue to be the case.
@@ -174,7 +178,12 @@ extern struct kimage *kexec_crash_image;
174#define KEXEC_ARCH_MIPS_LE (10 << 16) 178#define KEXEC_ARCH_MIPS_LE (10 << 16)
175#define KEXEC_ARCH_MIPS ( 8 << 16) 179#define KEXEC_ARCH_MIPS ( 8 << 16)
176 180
177#define KEXEC_FLAGS (KEXEC_ON_CRASH) /* List of defined/legal kexec flags */ 181/* List of defined/legal kexec flags */
182#ifndef CONFIG_KEXEC_JUMP
183#define KEXEC_FLAGS KEXEC_ON_CRASH
184#else
185#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
186#endif
178 187
179#define VMCOREINFO_BYTES (4096) 188#define VMCOREINFO_BYTES (4096)
180#define VMCOREINFO_NOTE_NAME "VMCOREINFO" 189#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6db42ff8d520..a0d920915b38 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -24,6 +24,8 @@
24#include <linux/utsrelease.h> 24#include <linux/utsrelease.h>
25#include <linux/utsname.h> 25#include <linux/utsname.h>
26#include <linux/numa.h> 26#include <linux/numa.h>
27#include <linux/suspend.h>
28#include <linux/device.h>
27 29
28#include <asm/page.h> 30#include <asm/page.h>
29#include <asm/uaccess.h> 31#include <asm/uaccess.h>
@@ -242,6 +244,12 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
242 goto out; 244 goto out;
243 } 245 }
244 246
247 image->swap_page = kimage_alloc_control_pages(image, 0);
248 if (!image->swap_page) {
249 printk(KERN_ERR "Could not allocate swap buffer\n");
250 goto out;
251 }
252
245 result = 0; 253 result = 0;
246 out: 254 out:
247 if (result == 0) 255 if (result == 0)
@@ -986,6 +994,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
986 if (result) 994 if (result)
987 goto out; 995 goto out;
988 996
997 if (flags & KEXEC_PRESERVE_CONTEXT)
998 image->preserve_context = 1;
989 result = machine_kexec_prepare(image); 999 result = machine_kexec_prepare(image);
990 if (result) 1000 if (result)
991 goto out; 1001 goto out;
@@ -1411,3 +1421,50 @@ static int __init crash_save_vmcoreinfo_init(void)
1411} 1421}
1412 1422
1413module_init(crash_save_vmcoreinfo_init) 1423module_init(crash_save_vmcoreinfo_init)
1424
1425/**
1426 * kernel_kexec - reboot the system
1427 *
1428 * Move into place and start executing a preloaded standalone
1429 * executable. If nothing was preloaded return an error.
1430 */
1431int kernel_kexec(void)
1432{
1433 int error = 0;
1434
1435 if (xchg(&kexec_lock, 1))
1436 return -EBUSY;
1437 if (!kexec_image) {
1438 error = -EINVAL;
1439 goto Unlock;
1440 }
1441
1442 if (kexec_image->preserve_context) {
1443#ifdef CONFIG_KEXEC_JUMP
1444 local_irq_disable();
1445 save_processor_state();
1446#endif
1447 } else {
1448 blocking_notifier_call_chain(&reboot_notifier_list,
1449 SYS_RESTART, NULL);
1450 system_state = SYSTEM_RESTART;
1451 device_shutdown();
1452 sysdev_shutdown();
1453 printk(KERN_EMERG "Starting new kernel\n");
1454 machine_shutdown();
1455 }
1456
1457 machine_kexec(kexec_image);
1458
1459 if (kexec_image->preserve_context) {
1460#ifdef CONFIG_KEXEC_JUMP
1461 restore_processor_state();
1462 local_irq_enable();
1463#endif
1464 }
1465
1466 Unlock:
1467 xchg(&kexec_lock, 0);
1468
1469 return error;
1470}
diff --git a/kernel/sys.c b/kernel/sys.c
index 0c9d3fa1f5ff..c01858090a98 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -301,26 +301,6 @@ void kernel_restart(char *cmd)
301} 301}
302EXPORT_SYMBOL_GPL(kernel_restart); 302EXPORT_SYMBOL_GPL(kernel_restart);
303 303
304/**
305 * kernel_kexec - reboot the system
306 *
307 * Move into place and start executing a preloaded standalone
308 * executable. If nothing was preloaded return an error.
309 */
310static void kernel_kexec(void)
311{
312#ifdef CONFIG_KEXEC
313 struct kimage *image;
314 image = xchg(&kexec_image, NULL);
315 if (!image)
316 return;
317 kernel_restart_prepare(NULL);
318 printk(KERN_EMERG "Starting new kernel\n");
319 machine_shutdown();
320 machine_kexec(image);
321#endif
322}
323
324static void kernel_shutdown_prepare(enum system_states state) 304static void kernel_shutdown_prepare(enum system_states state)
325{ 305{
326 blocking_notifier_call_chain(&reboot_notifier_list, 306 blocking_notifier_call_chain(&reboot_notifier_list,
@@ -425,10 +405,15 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
425 kernel_restart(buffer); 405 kernel_restart(buffer);
426 break; 406 break;
427 407
408#ifdef CONFIG_KEXEC
428 case LINUX_REBOOT_CMD_KEXEC: 409 case LINUX_REBOOT_CMD_KEXEC:
429 kernel_kexec(); 410 {
430 unlock_kernel(); 411 int ret;
431 return -EINVAL; 412 ret = kernel_kexec();
413 unlock_kernel();
414 return ret;
415 }
416#endif
432 417
433#ifdef CONFIG_HIBERNATION 418#ifdef CONFIG_HIBERNATION
434 case LINUX_REBOOT_CMD_SW_SUSPEND: 419 case LINUX_REBOOT_CMD_SW_SUSPEND: