aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2005-06-25 17:57:56 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 19:24:49 -0400
commit5033cba087f6ac773002123aafbea1aad4267682 (patch)
treefa0301c28c004e81d3aad597f23ea2407db8396c /arch/i386/kernel
parentdd2a13054ffc25783a74afb5e4a0f2115e45f9cd (diff)
[PATCH] kexec: x86 kexec core
This is the i386 implementation of kexec. Signed-off-by: Eric Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/crash.c42
-rw-r--r--arch/i386/kernel/machine_kexec.c220
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/syscall_table.S2
5 files changed, 384 insertions, 1 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 51ecd512603d..4cc83b322b36 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
25obj-$(CONFIG_X86_IO_APIC) += io_apic.o 25obj-$(CONFIG_X86_IO_APIC) += io_apic.o
26obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o 26obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
27obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_X86_NUMAQ) += numaq.o 28obj-$(CONFIG_X86_NUMAQ) += numaq.o
28obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o 29obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
29obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
new file mode 100644
index 000000000000..fa27a6c2abb6
--- /dev/null
+++ b/arch/i386/kernel/crash.c
@@ -0,0 +1,42 @@
1/*
2 * Architecture specific (i386) functions for kexec based crash dumps.
3 *
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/smp.h>
14#include <linux/irq.h>
15#include <linux/reboot.h>
16#include <linux/kexec.h>
17#include <linux/irq.h>
18#include <linux/delay.h>
19#include <linux/elf.h>
20#include <linux/elfcore.h>
21
22#include <asm/processor.h>
23#include <asm/hardirq.h>
24#include <asm/nmi.h>
25#include <asm/hw_irq.h>
26
27#define MAX_NOTE_BYTES 1024
28typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
29
30note_buf_t crash_notes[NR_CPUS];
31
32void machine_crash_shutdown(void)
33{
34 /* This function is only called after the system
35 * has paniced or is otherwise in a critical state.
36 * The minimum amount of code to allow a kexec'd kernel
37 * to run successfully needs to happen here.
38 *
39 * In practice this means shooting down the other cpus in
40 * an SMP system.
41 */
42}
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
new file mode 100644
index 000000000000..671880415d1c
--- /dev/null
+++ b/arch/i386/kernel/machine_kexec.c
@@ -0,0 +1,220 @@
1/*
2 * machine_kexec.c - handle transition of Linux booting another kernel
3 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/mm.h>
10#include <linux/kexec.h>
11#include <linux/delay.h>
12#include <asm/pgtable.h>
13#include <asm/pgalloc.h>
14#include <asm/tlbflush.h>
15#include <asm/mmu_context.h>
16#include <asm/io.h>
17#include <asm/apic.h>
18#include <asm/cpufeature.h>
19
20static inline unsigned long read_cr3(void)
21{
22 unsigned long cr3;
23 asm volatile("movl %%cr3,%0": "=r"(cr3));
24 return cr3;
25}
26
27#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
28
29#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
30#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
31#define L2_ATTR (_PAGE_PRESENT)
32
33#define LEVEL0_SIZE (1UL << 12UL)
34
35#ifndef CONFIG_X86_PAE
36#define LEVEL1_SIZE (1UL << 22UL)
37static u32 pgtable_level1[1024] PAGE_ALIGNED;
38
39static void identity_map_page(unsigned long address)
40{
41 unsigned long level1_index, level2_index;
42 u32 *pgtable_level2;
43
44 /* Find the current page table */
45 pgtable_level2 = __va(read_cr3());
46
47 /* Find the indexes of the physical address to identity map */
48 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
49 level2_index = address / LEVEL1_SIZE;
50
51 /* Identity map the page table entry */
52 pgtable_level1[level1_index] = address | L0_ATTR;
53 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
54
55 /* Flush the tlb so the new mapping takes effect.
56 * Global tlb entries are not flushed but that is not an issue.
57 */
58 load_cr3(pgtable_level2);
59}
60
61#else
62#define LEVEL1_SIZE (1UL << 21UL)
63#define LEVEL2_SIZE (1UL << 30UL)
64static u64 pgtable_level1[512] PAGE_ALIGNED;
65static u64 pgtable_level2[512] PAGE_ALIGNED;
66
67static void identity_map_page(unsigned long address)
68{
69 unsigned long level1_index, level2_index, level3_index;
70 u64 *pgtable_level3;
71
72 /* Find the current page table */
73 pgtable_level3 = __va(read_cr3());
74
75 /* Find the indexes of the physical address to identity map */
76 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
77 level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
78 level3_index = address / LEVEL2_SIZE;
79
80 /* Identity map the page table entry */
81 pgtable_level1[level1_index] = address | L0_ATTR;
82 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
83 set_64bit(&pgtable_level3[level3_index], __pa(pgtable_level2) | L2_ATTR);
84
85 /* Flush the tlb so the new mapping takes effect.
86 * Global tlb entries are not flushed but that is not an issue.
87 */
88 load_cr3(pgtable_level3);
89}
90#endif
91
92
93static void set_idt(void *newidt, __u16 limit)
94{
95 unsigned char curidt[6];
96
97 /* ia32 supports unaliged loads & stores */
98 (*(__u16 *)(curidt)) = limit;
99 (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
100
101 __asm__ __volatile__ (
102 "lidt %0\n"
103 : "=m" (curidt)
104 );
105};
106
107
108static void set_gdt(void *newgdt, __u16 limit)
109{
110 unsigned char curgdt[6];
111
112 /* ia32 supports unaligned loads & stores */
113 (*(__u16 *)(curgdt)) = limit;
114 (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
115
116 __asm__ __volatile__ (
117 "lgdt %0\n"
118 : "=m" (curgdt)
119 );
120};
121
122static void load_segments(void)
123{
124#define __STR(X) #X
125#define STR(X) __STR(X)
126
127 __asm__ __volatile__ (
128 "\tljmp $"STR(__KERNEL_CS)",$1f\n"
129 "\t1:\n"
130 "\tmovl $"STR(__KERNEL_DS)",%eax\n"
131 "\tmovl %eax,%ds\n"
132 "\tmovl %eax,%es\n"
133 "\tmovl %eax,%fs\n"
134 "\tmovl %eax,%gs\n"
135 "\tmovl %eax,%ss\n"
136 );
137#undef STR
138#undef __STR
139}
140
141typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
142 unsigned long indirection_page, unsigned long reboot_code_buffer,
143 unsigned long start_address, unsigned int has_pae) ATTRIB_NORET;
144
145const extern unsigned char relocate_new_kernel[];
146extern void relocate_new_kernel_end(void);
147const extern unsigned int relocate_new_kernel_size;
148
149/*
150 * A architecture hook called to validate the
151 * proposed image and prepare the control pages
152 * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
153 * have been allocated, but the segments have yet
154 * been copied into the kernel.
155 *
156 * Do what every setup is needed on image and the
157 * reboot code buffer to allow us to avoid allocations
158 * later.
159 *
160 * Currently nothing.
161 */
162int machine_kexec_prepare(struct kimage *image)
163{
164 return 0;
165}
166
167/*
168 * Undo anything leftover by machine_kexec_prepare
169 * when an image is freed.
170 */
171void machine_kexec_cleanup(struct kimage *image)
172{
173}
174
175/*
176 * Do not allocate memory (or fail in any way) in machine_kexec().
177 * We are past the point of no return, committed to rebooting now.
178 */
179NORET_TYPE void machine_kexec(struct kimage *image)
180{
181 unsigned long page_list;
182 unsigned long reboot_code_buffer;
183 relocate_new_kernel_t rnk;
184
185 /* Interrupts aren't acceptable while we reboot */
186 local_irq_disable();
187
188 /* Compute some offsets */
189 reboot_code_buffer = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
190 page_list = image->head;
191
192 /* Set up an identity mapping for the reboot_code_buffer */
193 identity_map_page(reboot_code_buffer);
194
195 /* copy it out */
196 memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
197
198 /* The segment registers are funny things, they are
199 * automatically loaded from a table, in memory wherever you
200 * set them to a specific selector, but this table is never
201 * accessed again you set the segment to a different selector.
202 *
203 * The more common model is are caches where the behide
204 * the scenes work is done, but is also dropped at arbitrary
205 * times.
206 *
207 * I take advantage of this here by force loading the
208 * segments, before I zap the gdt with an invalid value.
209 */
210 load_segments();
211 /* The gdt & idt are now invalid.
212 * If you want to load them you must set up your own idt & gdt.
213 */
214 set_gdt(phys_to_virt(0),0);
215 set_idt(phys_to_virt(0),0);
216
217 /* now call it */
218 rnk = (relocate_new_kernel_t) reboot_code_buffer;
219 (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
220}
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..d312616effa1
--- /dev/null
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -0,0 +1,120 @@
1/*
2 * relocate_kernel.S - put the kernel image in place to boot
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/linkage.h>
10
11 /*
12 * Must be relocatable PIC code callable as a C function, that once
13 * it starts can not use the previous processes stack.
14 */
15 .globl relocate_new_kernel
16relocate_new_kernel:
17 /* read the arguments and say goodbye to the stack */
18 movl 4(%esp), %ebx /* page_list */
19 movl 8(%esp), %ebp /* reboot_code_buffer */
20 movl 12(%esp), %edx /* start address */
21 movl 16(%esp), %ecx /* cpu_has_pae */
22
23 /* zero out flags, and disable interrupts */
24 pushl $0
25 popfl
26
27 /* set a new stack at the bottom of our page... */
28 lea 4096(%ebp), %esp
29
30 /* store the parameters back on the stack */
31 pushl %edx /* store the start address */
32
33 /* Set cr0 to a known state:
34 * 31 0 == Paging disabled
35 * 18 0 == Alignment check disabled
36 * 16 0 == Write protect disabled
37 * 3 0 == No task switch
38 * 2 0 == Don't do FP software emulation.
39 * 0 1 == Proctected mode enabled
40 */
41 movl %cr0, %eax
42 andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
43 orl $(1<<0), %eax
44 movl %eax, %cr0
45
46 /* clear cr4 if applicable */
47 testl %ecx, %ecx
48 jz 1f
49 /* Set cr4 to a known state:
50 * Setting everything to zero seems safe.
51 */
52 movl %cr4, %eax
53 andl $0, %eax
54 movl %eax, %cr4
55
56 jmp 1f
571:
58
59 /* Flush the TLB (needed?) */
60 xorl %eax, %eax
61 movl %eax, %cr3
62
63 /* Do the copies */
64 movl %ebx, %ecx
65 jmp 1f
66
670: /* top, read another word from the indirection page */
68 movl (%ebx), %ecx
69 addl $4, %ebx
701:
71 testl $0x1, %ecx /* is it a destination page */
72 jz 2f
73 movl %ecx, %edi
74 andl $0xfffff000, %edi
75 jmp 0b
762:
77 testl $0x2, %ecx /* is it an indirection page */
78 jz 2f
79 movl %ecx, %ebx
80 andl $0xfffff000, %ebx
81 jmp 0b
822:
83 testl $0x4, %ecx /* is it the done indicator */
84 jz 2f
85 jmp 3f
862:
87 testl $0x8, %ecx /* is it the source indicator */
88 jz 0b /* Ignore it otherwise */
89 movl %ecx, %esi /* For every source page do a copy */
90 andl $0xfffff000, %esi
91
92 movl $1024, %ecx
93 rep ; movsl
94 jmp 0b
95
963:
97
98 /* To be certain of avoiding problems with self-modifying code
99 * I need to execute a serializing instruction here.
100 * So I flush the TLB, it's handy, and not processor dependent.
101 */
102 xorl %eax, %eax
103 movl %eax, %cr3
104
105 /* set all of the registers to known values */
106 /* leave %esp alone */
107
108 xorl %eax, %eax
109 xorl %ebx, %ebx
110 xorl %ecx, %ecx
111 xorl %edx, %edx
112 xorl %esi, %esi
113 xorl %edi, %edi
114 xorl %ebp, %ebp
115 ret
116relocate_new_kernel_end:
117
118 .globl relocate_new_kernel_size
119relocate_new_kernel_size:
120 .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index d408afaf6495..442a6e937b19 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -283,7 +283,7 @@ ENTRY(sys_call_table)
283 .long sys_mq_timedreceive /* 280 */ 283 .long sys_mq_timedreceive /* 280 */
284 .long sys_mq_notify 284 .long sys_mq_notify
285 .long sys_mq_getsetattr 285 .long sys_mq_getsetattr
286 .long sys_ni_syscall /* reserved for kexec */ 286 .long sys_kexec_load
287 .long sys_waitid 287 .long sys_waitid
288 .long sys_ni_syscall /* 285 */ /* available */ 288 .long sys_ni_syscall /* 285 */ /* available */
289 .long sys_add_key 289 .long sys_add_key