diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2005-06-25 17:57:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-25 19:24:49 -0400 |
commit | 5033cba087f6ac773002123aafbea1aad4267682 (patch) | |
tree | fa0301c28c004e81d3aad597f23ea2407db8396c /arch/i386/kernel/machine_kexec.c | |
parent | dd2a13054ffc25783a74afb5e4a0f2115e45f9cd (diff) |
[PATCH] kexec: x86 kexec core
This is the i386 implementation of kexec.
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel/machine_kexec.c')
-rw-r--r-- | arch/i386/kernel/machine_kexec.c | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c new file mode 100644 index 000000000000..671880415d1c --- /dev/null +++ b/arch/i386/kernel/machine_kexec.c | |||
@@ -0,0 +1,220 @@ | |||
1 | /* | ||
2 | * machine_kexec.c - handle transition of Linux booting another kernel | ||
3 | * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> | ||
4 | * | ||
5 | * This source code is licensed under the GNU General Public License, | ||
6 | * Version 2. See the file COPYING for more details. | ||
7 | */ | ||
8 | |||
9 | #include <linux/mm.h> | ||
10 | #include <linux/kexec.h> | ||
11 | #include <linux/delay.h> | ||
12 | #include <asm/pgtable.h> | ||
13 | #include <asm/pgalloc.h> | ||
14 | #include <asm/tlbflush.h> | ||
15 | #include <asm/mmu_context.h> | ||
16 | #include <asm/io.h> | ||
17 | #include <asm/apic.h> | ||
18 | #include <asm/cpufeature.h> | ||
19 | |||
20 | static inline unsigned long read_cr3(void) | ||
21 | { | ||
22 | unsigned long cr3; | ||
23 | asm volatile("movl %%cr3,%0": "=r"(cr3)); | ||
24 | return cr3; | ||
25 | } | ||
26 | |||
27 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
28 | |||
29 | #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
30 | #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
31 | #define L2_ATTR (_PAGE_PRESENT) | ||
32 | |||
33 | #define LEVEL0_SIZE (1UL << 12UL) | ||
34 | |||
35 | #ifndef CONFIG_X86_PAE | ||
36 | #define LEVEL1_SIZE (1UL << 22UL) | ||
37 | static u32 pgtable_level1[1024] PAGE_ALIGNED; | ||
38 | |||
39 | static void identity_map_page(unsigned long address) | ||
40 | { | ||
41 | unsigned long level1_index, level2_index; | ||
42 | u32 *pgtable_level2; | ||
43 | |||
44 | /* Find the current page table */ | ||
45 | pgtable_level2 = __va(read_cr3()); | ||
46 | |||
47 | /* Find the indexes of the physical address to identity map */ | ||
48 | level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; | ||
49 | level2_index = address / LEVEL1_SIZE; | ||
50 | |||
51 | /* Identity map the page table entry */ | ||
52 | pgtable_level1[level1_index] = address | L0_ATTR; | ||
53 | pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; | ||
54 | |||
55 | /* Flush the tlb so the new mapping takes effect. | ||
56 | * Global tlb entries are not flushed but that is not an issue. | ||
57 | */ | ||
58 | load_cr3(pgtable_level2); | ||
59 | } | ||
60 | |||
61 | #else | ||
62 | #define LEVEL1_SIZE (1UL << 21UL) | ||
63 | #define LEVEL2_SIZE (1UL << 30UL) | ||
64 | static u64 pgtable_level1[512] PAGE_ALIGNED; | ||
65 | static u64 pgtable_level2[512] PAGE_ALIGNED; | ||
66 | |||
67 | static void identity_map_page(unsigned long address) | ||
68 | { | ||
69 | unsigned long level1_index, level2_index, level3_index; | ||
70 | u64 *pgtable_level3; | ||
71 | |||
72 | /* Find the current page table */ | ||
73 | pgtable_level3 = __va(read_cr3()); | ||
74 | |||
75 | /* Find the indexes of the physical address to identity map */ | ||
76 | level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; | ||
77 | level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; | ||
78 | level3_index = address / LEVEL2_SIZE; | ||
79 | |||
80 | /* Identity map the page table entry */ | ||
81 | pgtable_level1[level1_index] = address | L0_ATTR; | ||
82 | pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; | ||
83 | set_64bit(&pgtable_level3[level3_index], __pa(pgtable_level2) | L2_ATTR); | ||
84 | |||
85 | /* Flush the tlb so the new mapping takes effect. | ||
86 | * Global tlb entries are not flushed but that is not an issue. | ||
87 | */ | ||
88 | load_cr3(pgtable_level3); | ||
89 | } | ||
90 | #endif | ||
91 | |||
92 | |||
93 | static void set_idt(void *newidt, __u16 limit) | ||
94 | { | ||
95 | unsigned char curidt[6]; | ||
96 | |||
97 | /* ia32 supports unaliged loads & stores */ | ||
98 | (*(__u16 *)(curidt)) = limit; | ||
99 | (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); | ||
100 | |||
101 | __asm__ __volatile__ ( | ||
102 | "lidt %0\n" | ||
103 | : "=m" (curidt) | ||
104 | ); | ||
105 | }; | ||
106 | |||
107 | |||
108 | static void set_gdt(void *newgdt, __u16 limit) | ||
109 | { | ||
110 | unsigned char curgdt[6]; | ||
111 | |||
112 | /* ia32 supports unaligned loads & stores */ | ||
113 | (*(__u16 *)(curgdt)) = limit; | ||
114 | (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); | ||
115 | |||
116 | __asm__ __volatile__ ( | ||
117 | "lgdt %0\n" | ||
118 | : "=m" (curgdt) | ||
119 | ); | ||
120 | }; | ||
121 | |||
122 | static void load_segments(void) | ||
123 | { | ||
124 | #define __STR(X) #X | ||
125 | #define STR(X) __STR(X) | ||
126 | |||
127 | __asm__ __volatile__ ( | ||
128 | "\tljmp $"STR(__KERNEL_CS)",$1f\n" | ||
129 | "\t1:\n" | ||
130 | "\tmovl $"STR(__KERNEL_DS)",%eax\n" | ||
131 | "\tmovl %eax,%ds\n" | ||
132 | "\tmovl %eax,%es\n" | ||
133 | "\tmovl %eax,%fs\n" | ||
134 | "\tmovl %eax,%gs\n" | ||
135 | "\tmovl %eax,%ss\n" | ||
136 | ); | ||
137 | #undef STR | ||
138 | #undef __STR | ||
139 | } | ||
140 | |||
141 | typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( | ||
142 | unsigned long indirection_page, unsigned long reboot_code_buffer, | ||
143 | unsigned long start_address, unsigned int has_pae) ATTRIB_NORET; | ||
144 | |||
145 | const extern unsigned char relocate_new_kernel[]; | ||
146 | extern void relocate_new_kernel_end(void); | ||
147 | const extern unsigned int relocate_new_kernel_size; | ||
148 | |||
149 | /* | ||
150 | * A architecture hook called to validate the | ||
151 | * proposed image and prepare the control pages | ||
152 | * as needed. The pages for KEXEC_CONTROL_CODE_SIZE | ||
153 | * have been allocated, but the segments have yet | ||
154 | * been copied into the kernel. | ||
155 | * | ||
156 | * Do what every setup is needed on image and the | ||
157 | * reboot code buffer to allow us to avoid allocations | ||
158 | * later. | ||
159 | * | ||
160 | * Currently nothing. | ||
161 | */ | ||
162 | int machine_kexec_prepare(struct kimage *image) | ||
163 | { | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Undo anything leftover by machine_kexec_prepare | ||
169 | * when an image is freed. | ||
170 | */ | ||
171 | void machine_kexec_cleanup(struct kimage *image) | ||
172 | { | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * Do not allocate memory (or fail in any way) in machine_kexec(). | ||
177 | * We are past the point of no return, committed to rebooting now. | ||
178 | */ | ||
179 | NORET_TYPE void machine_kexec(struct kimage *image) | ||
180 | { | ||
181 | unsigned long page_list; | ||
182 | unsigned long reboot_code_buffer; | ||
183 | relocate_new_kernel_t rnk; | ||
184 | |||
185 | /* Interrupts aren't acceptable while we reboot */ | ||
186 | local_irq_disable(); | ||
187 | |||
188 | /* Compute some offsets */ | ||
189 | reboot_code_buffer = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | ||
190 | page_list = image->head; | ||
191 | |||
192 | /* Set up an identity mapping for the reboot_code_buffer */ | ||
193 | identity_map_page(reboot_code_buffer); | ||
194 | |||
195 | /* copy it out */ | ||
196 | memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); | ||
197 | |||
198 | /* The segment registers are funny things, they are | ||
199 | * automatically loaded from a table, in memory wherever you | ||
200 | * set them to a specific selector, but this table is never | ||
201 | * accessed again you set the segment to a different selector. | ||
202 | * | ||
203 | * The more common model is are caches where the behide | ||
204 | * the scenes work is done, but is also dropped at arbitrary | ||
205 | * times. | ||
206 | * | ||
207 | * I take advantage of this here by force loading the | ||
208 | * segments, before I zap the gdt with an invalid value. | ||
209 | */ | ||
210 | load_segments(); | ||
211 | /* The gdt & idt are now invalid. | ||
212 | * If you want to load them you must set up your own idt & gdt. | ||
213 | */ | ||
214 | set_gdt(phys_to_virt(0),0); | ||
215 | set_idt(phys_to_virt(0),0); | ||
216 | |||
217 | /* now call it */ | ||
218 | rnk = (relocate_new_kernel_t) reboot_code_buffer; | ||
219 | (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); | ||
220 | } | ||