aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/relocate_kernel_32.S
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2008-07-25 22:45:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-26 15:00:04 -0400
commit3ab83521378268044a448113c6aa9a9e245f4d2f (patch)
treee9e8496577e4b2e994edf204e9a8ae7c026eec95 /arch/x86/kernel/relocate_kernel_32.S
parent7fccf0326536c1b245b98740d489abb9aab69a12 (diff)
kexec jump
This patch provides an enhancement to kexec/kdump. It implements the following features: - Backup/restore memory used by the original kernel before/after kexec. - Save/restore CPU state before/after kexec. The features of this patch can be used as a general method to call program in physical mode (paging turning off). This can be used to call BIOS code under Linux. kexec-tools needs to be patched to support kexec jump. The patches and the precompiled kexec can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10 Usage example of calling some physical mode code and return: 1. Compile and install patched kernel with following options selected: CONFIG_X86_32=y CONFIG_KEXEC=y CONFIG_PM=y CONFIG_KEXEC_JUMP=y 2. Build patched kexec-tool or download the pre-built one. 3. Build some physical mode executable named such as "phy_mode" 4. Boot kernel compiled in step 1. 5. Load physical mode executable with /sbin/kexec. The shell command line can be as follow: /sbin/kexec --load-preserve-context --args-none phy_mode 6. Call physical mode executable with following shell command line: /sbin/kexec -e Implementation point: To support jumping without reserving memory. One shadow backup page (source page) is allocated for each page used by kexeced code image (destination page). When do kexec_load, the image of kexeced code is loaded into source pages, and before executing, the destination pages and the source pages are swapped, so the contents of destination pages are backupped. Before jumping to the kexeced code image and after jumping back to the original kernel, the destination pages and the source pages are swapped too. C ABI (calling convention) is used as communication protocol between kernel and called code. A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to indicate that the loaded kernel image is used for jumping back. Now, only the i386 architecture is supported. Signed-off-by: Huang Ying <ying.huang@intel.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Nigel Cunningham <nigel@nigel.suspend2.net> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86/kernel/relocate_kernel_32.S')
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S174
1 files changed, 150 insertions, 24 deletions
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470d..703310a99023 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,44 @@
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT) 21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 22
23/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are
24 * used to save some data for jumping back
25 */
26#define DATA(offset) (PAGE_SIZE/2+(offset))
27
28/* Minimal CPU state */
29#define ESP DATA(0x0)
30#define CR0 DATA(0x4)
31#define CR3 DATA(0x8)
32#define CR4 DATA(0xc)
33
34/* other data */
35#define CP_VA_CONTROL_PAGE DATA(0x10)
36#define CP_PA_PGD DATA(0x14)
37#define CP_PA_SWAP_PAGE DATA(0x18)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
39
23 .text 40 .text
24 .align PAGE_SIZE 41 .align PAGE_SIZE
25 .globl relocate_kernel 42 .globl relocate_kernel
26relocate_kernel: 43relocate_kernel:
27 movl 8(%esp), %ebp /* list of pages */ 44 /* Save the CPU context, used for jumping back */
45
46 pushl %ebx
47 pushl %esi
48 pushl %edi
49 pushl %ebp
50 pushf
51
52 movl 20+8(%esp), %ebp /* list of pages */
53 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
54 movl %esp, ESP(%edi)
55 movl %cr0, %eax
56 movl %eax, CR0(%edi)
57 movl %cr3, %eax
58 movl %eax, CR3(%edi)
59 movl %cr4, %eax
60 movl %eax, CR4(%edi)
28 61
29#ifdef CONFIG_X86_PAE 62#ifdef CONFIG_X86_PAE
30 /* map the control page at its virtual address */ 63 /* map the control page at its virtual address */
@@ -138,15 +171,25 @@ relocate_kernel:
138 171
139relocate_new_kernel: 172relocate_new_kernel:
140 /* read the arguments and say goodbye to the stack */ 173 /* read the arguments and say goodbye to the stack */
141 movl 4(%esp), %ebx /* page_list */ 174 movl 20+4(%esp), %ebx /* page_list */
142 movl 8(%esp), %ebp /* list of pages */ 175 movl 20+8(%esp), %ebp /* list of pages */
143 movl 12(%esp), %edx /* start address */ 176 movl 20+12(%esp), %edx /* start address */
144 movl 16(%esp), %ecx /* cpu_has_pae */ 177 movl 20+16(%esp), %ecx /* cpu_has_pae */
178 movl 20+20(%esp), %esi /* preserve_context */
145 179
146 /* zero out flags, and disable interrupts */ 180 /* zero out flags, and disable interrupts */
147 pushl $0 181 pushl $0
148 popfl 182 popfl
149 183
184 /* save some information for jumping back */
185 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
186 movl %edi, CP_VA_CONTROL_PAGE(%edi)
187 movl PTR(PA_PGD)(%ebp), %eax
188 movl %eax, CP_PA_PGD(%edi)
189 movl PTR(PA_SWAP_PAGE)(%ebp), %eax
190 movl %eax, CP_PA_SWAP_PAGE(%edi)
191 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
192
150 /* get physical address of control page now */ 193 /* get physical address of control page now */
151 /* this is impossible after page table switch */ 194 /* this is impossible after page table switch */
152 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi 195 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +240,90 @@ identity_mapped:
197 xorl %eax, %eax 240 xorl %eax, %eax
198 movl %eax, %cr3 241 movl %eax, %cr3
199 242
243 movl CP_PA_SWAP_PAGE(%edi), %eax
244 pushl %eax
245 pushl %ebx
246 call swap_pages
247 addl $8, %esp
248
249 /* To be certain of avoiding problems with self-modifying code
250 * I need to execute a serializing instruction here.
251 * So I flush the TLB, it's handy, and not processor dependent.
252 */
253 xorl %eax, %eax
254 movl %eax, %cr3
255
256 /* set all of the registers to known values */
257 /* leave %esp alone */
258
259 testl %esi, %esi
260 jnz 1f
261 xorl %edi, %edi
262 xorl %eax, %eax
263 xorl %ebx, %ebx
264 xorl %ecx, %ecx
265 xorl %edx, %edx
266 xorl %esi, %esi
267 xorl %ebp, %ebp
268 ret
2691:
270 popl %edx
271 movl CP_PA_SWAP_PAGE(%edi), %esp
272 addl $PAGE_SIZE, %esp
2732:
274 call *%edx
275
276 /* get the re-entry point of the peer system */
277 movl 0(%esp), %ebp
278 call 1f
2791:
280 popl %ebx
281 subl $(1b - relocate_kernel), %ebx
282 movl CP_VA_CONTROL_PAGE(%ebx), %edi
283 lea PAGE_SIZE(%ebx), %esp
284 movl CP_PA_SWAP_PAGE(%ebx), %eax
285 movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
286 pushl %eax
287 pushl %edx
288 call swap_pages
289 addl $8, %esp
290 movl CP_PA_PGD(%ebx), %eax
291 movl %eax, %cr3
292 movl %cr0, %eax
293 orl $(1<<31), %eax
294 movl %eax, %cr0
295 lea PAGE_SIZE(%edi), %esp
296 movl %edi, %eax
297 addl $(virtual_mapped - relocate_kernel), %eax
298 pushl %eax
299 ret
300
301virtual_mapped:
302 movl CR4(%edi), %eax
303 movl %eax, %cr4
304 movl CR3(%edi), %eax
305 movl %eax, %cr3
306 movl CR0(%edi), %eax
307 movl %eax, %cr0
308 movl ESP(%edi), %esp
309 movl %ebp, %eax
310
311 popf
312 popl %ebp
313 popl %edi
314 popl %esi
315 popl %ebx
316 ret
317
200 /* Do the copies */ 318 /* Do the copies */
201 movl %ebx, %ecx 319swap_pages:
320 movl 8(%esp), %edx
321 movl 4(%esp), %ecx
322 pushl %ebp
323 pushl %ebx
324 pushl %edi
325 pushl %esi
326 movl %ecx, %ebx
202 jmp 1f 327 jmp 1f
203 328
2040: /* top, read another word from the indirection page */ 3290: /* top, read another word from the indirection page */
@@ -226,27 +351,28 @@ identity_mapped:
226 movl %ecx, %esi /* For every source page do a copy */ 351 movl %ecx, %esi /* For every source page do a copy */
227 andl $0xfffff000, %esi 352 andl $0xfffff000, %esi
228 353
354 movl %edi, %eax
355 movl %esi, %ebp
356
357 movl %edx, %edi
229 movl $1024, %ecx 358 movl $1024, %ecx
230 rep ; movsl 359 rep ; movsl
231 jmp 0b
232 360
2333: 361 movl %ebp, %edi
234 362 movl %eax, %esi
235 /* To be certain of avoiding problems with self-modifying code 363 movl $1024, %ecx
236 * I need to execute a serializing instruction here. 364 rep ; movsl
237 * So I flush the TLB, it's handy, and not processor dependent.
238 */
239 xorl %eax, %eax
240 movl %eax, %cr3
241 365
242 /* set all of the registers to known values */ 366 movl %eax, %edi
243 /* leave %esp alone */ 367 movl %edx, %esi
368 movl $1024, %ecx
369 rep ; movsl
244 370
245 xorl %eax, %eax 371 lea PAGE_SIZE(%ebp), %esi
246 xorl %ebx, %ebx 372 jmp 0b
247 xorl %ecx, %ecx 3733:
248 xorl %edx, %edx 374 popl %esi
249 xorl %esi, %esi 375 popl %edi
250 xorl %edi, %edi 376 popl %ebx
251 xorl %ebp, %ebp 377 popl %ebp
252 ret 378 ret