diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2005-06-25 17:58:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-25 19:24:50 -0400 |
commit | 5234f5eb04abbbfa306ccfbc2ccbb6e73f515b15 (patch) | |
tree | 60f4701fdc501955ccff198f84913b96e3bbf5bf /arch/x86_64/kernel/machine_kexec.c | |
parent | d89559589a588d1a654329d8cd9a3ad33aaad9be (diff) |
[PATCH] kexec: x86_64 kexec implementation
This is the x86_64 implementation of machine kexec. 32bit compatibility
support has been implemented, and machine_kexec has been enhanced to not care
about the changing internal kernel paget table structures.
From: Alexander Nyberg <alexn@dsv.su.se>
build fix
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel/machine_kexec.c')
-rw-r--r-- | arch/x86_64/kernel/machine_kexec.c | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c new file mode 100644 index 000000000000..200b5993f8d9 --- /dev/null +++ b/arch/x86_64/kernel/machine_kexec.c | |||
@@ -0,0 +1,245 @@ | |||
1 | /* | ||
2 | * machine_kexec.c - handle transition of Linux booting another kernel | ||
3 | * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> | ||
4 | * | ||
5 | * This source code is licensed under the GNU General Public License, | ||
6 | * Version 2. See the file COPYING for more details. | ||
7 | */ | ||
8 | |||
9 | #include <linux/mm.h> | ||
10 | #include <linux/kexec.h> | ||
11 | #include <linux/delay.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/reboot.h> | ||
14 | #include <asm/pda.h> | ||
15 | #include <asm/pgtable.h> | ||
16 | #include <asm/pgalloc.h> | ||
17 | #include <asm/tlbflush.h> | ||
18 | #include <asm/mmu_context.h> | ||
19 | #include <asm/io.h> | ||
20 | #include <asm/apic.h> | ||
21 | #include <asm/cpufeature.h> | ||
22 | #include <asm/hw_irq.h> | ||
23 | |||
24 | #define LEVEL0_SIZE (1UL << 12UL) | ||
25 | #define LEVEL1_SIZE (1UL << 21UL) | ||
26 | #define LEVEL2_SIZE (1UL << 30UL) | ||
27 | #define LEVEL3_SIZE (1UL << 39UL) | ||
28 | #define LEVEL4_SIZE (1UL << 48UL) | ||
29 | |||
30 | #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
31 | #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE) | ||
32 | #define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
33 | #define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
34 | |||
35 | static void init_level2_page( | ||
36 | u64 *level2p, unsigned long addr) | ||
37 | { | ||
38 | unsigned long end_addr; | ||
39 | addr &= PAGE_MASK; | ||
40 | end_addr = addr + LEVEL2_SIZE; | ||
41 | while(addr < end_addr) { | ||
42 | *(level2p++) = addr | L1_ATTR; | ||
43 | addr += LEVEL1_SIZE; | ||
44 | } | ||
45 | } | ||
46 | |||
47 | static int init_level3_page(struct kimage *image, | ||
48 | u64 *level3p, unsigned long addr, unsigned long last_addr) | ||
49 | { | ||
50 | unsigned long end_addr; | ||
51 | int result; | ||
52 | result = 0; | ||
53 | addr &= PAGE_MASK; | ||
54 | end_addr = addr + LEVEL3_SIZE; | ||
55 | while((addr < last_addr) && (addr < end_addr)) { | ||
56 | struct page *page; | ||
57 | u64 *level2p; | ||
58 | page = kimage_alloc_control_pages(image, 0); | ||
59 | if (!page) { | ||
60 | result = -ENOMEM; | ||
61 | goto out; | ||
62 | } | ||
63 | level2p = (u64 *)page_address(page); | ||
64 | init_level2_page(level2p, addr); | ||
65 | *(level3p++) = __pa(level2p) | L2_ATTR; | ||
66 | addr += LEVEL2_SIZE; | ||
67 | } | ||
68 | /* clear the unused entries */ | ||
69 | while(addr < end_addr) { | ||
70 | *(level3p++) = 0; | ||
71 | addr += LEVEL2_SIZE; | ||
72 | } | ||
73 | out: | ||
74 | return result; | ||
75 | } | ||
76 | |||
77 | |||
78 | static int init_level4_page(struct kimage *image, | ||
79 | u64 *level4p, unsigned long addr, unsigned long last_addr) | ||
80 | { | ||
81 | unsigned long end_addr; | ||
82 | int result; | ||
83 | result = 0; | ||
84 | addr &= PAGE_MASK; | ||
85 | end_addr = addr + LEVEL4_SIZE; | ||
86 | while((addr < last_addr) && (addr < end_addr)) { | ||
87 | struct page *page; | ||
88 | u64 *level3p; | ||
89 | page = kimage_alloc_control_pages(image, 0); | ||
90 | if (!page) { | ||
91 | result = -ENOMEM; | ||
92 | goto out; | ||
93 | } | ||
94 | level3p = (u64 *)page_address(page); | ||
95 | result = init_level3_page(image, level3p, addr, last_addr); | ||
96 | if (result) { | ||
97 | goto out; | ||
98 | } | ||
99 | *(level4p++) = __pa(level3p) | L3_ATTR; | ||
100 | addr += LEVEL3_SIZE; | ||
101 | } | ||
102 | /* clear the unused entries */ | ||
103 | while(addr < end_addr) { | ||
104 | *(level4p++) = 0; | ||
105 | addr += LEVEL3_SIZE; | ||
106 | } | ||
107 | out: | ||
108 | return result; | ||
109 | } | ||
110 | |||
111 | |||
112 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | ||
113 | { | ||
114 | u64 *level4p; | ||
115 | level4p = (u64 *)__va(start_pgtable); | ||
116 | return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); | ||
117 | } | ||
118 | |||
119 | static void set_idt(void *newidt, u16 limit) | ||
120 | { | ||
121 | unsigned char curidt[10]; | ||
122 | |||
123 | /* x86-64 supports unaliged loads & stores */ | ||
124 | (*(u16 *)(curidt)) = limit; | ||
125 | (*(u64 *)(curidt +2)) = (unsigned long)(newidt); | ||
126 | |||
127 | __asm__ __volatile__ ( | ||
128 | "lidt %0\n" | ||
129 | : "=m" (curidt) | ||
130 | ); | ||
131 | }; | ||
132 | |||
133 | |||
134 | static void set_gdt(void *newgdt, u16 limit) | ||
135 | { | ||
136 | unsigned char curgdt[10]; | ||
137 | |||
138 | /* x86-64 supports unaligned loads & stores */ | ||
139 | (*(u16 *)(curgdt)) = limit; | ||
140 | (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt); | ||
141 | |||
142 | __asm__ __volatile__ ( | ||
143 | "lgdt %0\n" | ||
144 | : "=m" (curgdt) | ||
145 | ); | ||
146 | }; | ||
147 | |||
148 | static void load_segments(void) | ||
149 | { | ||
150 | __asm__ __volatile__ ( | ||
151 | "\tmovl $"STR(__KERNEL_DS)",%eax\n" | ||
152 | "\tmovl %eax,%ds\n" | ||
153 | "\tmovl %eax,%es\n" | ||
154 | "\tmovl %eax,%ss\n" | ||
155 | "\tmovl %eax,%fs\n" | ||
156 | "\tmovl %eax,%gs\n" | ||
157 | ); | ||
158 | #undef STR | ||
159 | #undef __STR | ||
160 | } | ||
161 | |||
162 | typedef NORET_TYPE void (*relocate_new_kernel_t)( | ||
163 | unsigned long indirection_page, unsigned long control_code_buffer, | ||
164 | unsigned long start_address, unsigned long pgtable) ATTRIB_NORET; | ||
165 | |||
166 | const extern unsigned char relocate_new_kernel[]; | ||
167 | const extern unsigned long relocate_new_kernel_size; | ||
168 | |||
169 | int machine_kexec_prepare(struct kimage *image) | ||
170 | { | ||
171 | unsigned long start_pgtable, control_code_buffer; | ||
172 | int result; | ||
173 | |||
174 | /* Calculate the offsets */ | ||
175 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | ||
176 | control_code_buffer = start_pgtable + 4096UL; | ||
177 | |||
178 | /* Setup the identity mapped 64bit page table */ | ||
179 | result = init_pgtable(image, start_pgtable); | ||
180 | if (result) { | ||
181 | return result; | ||
182 | } | ||
183 | |||
184 | /* Place the code in the reboot code buffer */ | ||
185 | memcpy(__va(control_code_buffer), relocate_new_kernel, relocate_new_kernel_size); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | void machine_kexec_cleanup(struct kimage *image) | ||
191 | { | ||
192 | return; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Do not allocate memory (or fail in any way) in machine_kexec(). | ||
197 | * We are past the point of no return, committed to rebooting now. | ||
198 | */ | ||
199 | NORET_TYPE void machine_kexec(struct kimage *image) | ||
200 | { | ||
201 | unsigned long page_list; | ||
202 | unsigned long control_code_buffer; | ||
203 | unsigned long start_pgtable; | ||
204 | relocate_new_kernel_t rnk; | ||
205 | |||
206 | /* Interrupts aren't acceptable while we reboot */ | ||
207 | local_irq_disable(); | ||
208 | |||
209 | /* Calculate the offsets */ | ||
210 | page_list = image->head; | ||
211 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | ||
212 | control_code_buffer = start_pgtable + 4096UL; | ||
213 | |||
214 | /* Set the low half of the page table to my identity mapped | ||
215 | * page table for kexec. Leave the high half pointing at the | ||
216 | * kernel pages. Don't bother to flush the global pages | ||
217 | * as that will happen when I fully switch to my identity mapped | ||
218 | * page table anyway. | ||
219 | */ | ||
220 | memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); | ||
221 | __flush_tlb(); | ||
222 | |||
223 | |||
224 | /* The segment registers are funny things, they are | ||
225 | * automatically loaded from a table, in memory wherever you | ||
226 | * set them to a specific selector, but this table is never | ||
227 | * accessed again unless you set the segment to a different selector. | ||
228 | * | ||
229 | * The more common model are caches where the behide | ||
230 | * the scenes work is done, but is also dropped at arbitrary | ||
231 | * times. | ||
232 | * | ||
233 | * I take advantage of this here by force loading the | ||
234 | * segments, before I zap the gdt with an invalid value. | ||
235 | */ | ||
236 | load_segments(); | ||
237 | /* The gdt & idt are now invalid. | ||
238 | * If you want to load them you must set up your own idt & gdt. | ||
239 | */ | ||
240 | set_gdt(phys_to_virt(0),0); | ||
241 | set_idt(phys_to_virt(0),0); | ||
242 | /* now call it */ | ||
243 | rnk = (relocate_new_kernel_t) control_code_buffer; | ||
244 | (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); | ||
245 | } | ||