diff options
author | Magnus Damm <magnus@valinux.co.jp> | 2006-09-26 04:52:38 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:38 -0400 |
commit | 4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f (patch) | |
tree | 652effe9d02c80d93357512233f4ee962320ac3b /arch/x86_64/kernel/machine_kexec.c | |
parent | f574164491d00d28b727d713685fb5edc9138200 (diff) |
[PATCH] Avoid overwriting the current pgd (V4, x86_64)
kexec: Avoid overwriting the current pgd (V4, x86_64)
This patch upgrades the x86_64-specific kexec code to avoid overwriting the
current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
to start a secondary kernel that dumps the memory of the previous kernel.
The code introduces a new set of page tables. These tables are used to provide
an executable identity mapping without overwriting the current pgd.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64/kernel/machine_kexec.c')
-rw-r--r-- | arch/x86_64/kernel/machine_kexec.c | 71 |
1 files changed, 37 insertions, 34 deletions
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 2e94c072d84a..0497e3bd5bff 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c | |||
@@ -15,6 +15,15 @@ | |||
15 | #include <asm/mmu_context.h> | 15 | #include <asm/mmu_context.h> |
16 | #include <asm/io.h> | 16 | #include <asm/io.h> |
17 | 17 | ||
18 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
19 | static u64 kexec_pgd[512] PAGE_ALIGNED; | ||
20 | static u64 kexec_pud0[512] PAGE_ALIGNED; | ||
21 | static u64 kexec_pmd0[512] PAGE_ALIGNED; | ||
22 | static u64 kexec_pte0[512] PAGE_ALIGNED; | ||
23 | static u64 kexec_pud1[512] PAGE_ALIGNED; | ||
24 | static u64 kexec_pmd1[512] PAGE_ALIGNED; | ||
25 | static u64 kexec_pte1[512] PAGE_ALIGNED; | ||
26 | |||
18 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | 27 | static void init_level2_page(pmd_t *level2p, unsigned long addr) |
19 | { | 28 | { |
20 | unsigned long end_addr; | 29 | unsigned long end_addr; |
@@ -144,32 +153,19 @@ static void load_segments(void) | |||
144 | ); | 153 | ); |
145 | } | 154 | } |
146 | 155 | ||
147 | typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, | ||
148 | unsigned long control_code_buffer, | ||
149 | unsigned long start_address, | ||
150 | unsigned long pgtable) ATTRIB_NORET; | ||
151 | |||
152 | extern const unsigned char relocate_new_kernel[]; | ||
153 | extern const unsigned long relocate_new_kernel_size; | ||
154 | |||
155 | int machine_kexec_prepare(struct kimage *image) | 156 | int machine_kexec_prepare(struct kimage *image) |
156 | { | 157 | { |
157 | unsigned long start_pgtable, control_code_buffer; | 158 | unsigned long start_pgtable; |
158 | int result; | 159 | int result; |
159 | 160 | ||
160 | /* Calculate the offsets */ | 161 | /* Calculate the offsets */ |
161 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | 162 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; |
162 | control_code_buffer = start_pgtable + PAGE_SIZE; | ||
163 | 163 | ||
164 | /* Setup the identity mapped 64bit page table */ | 164 | /* Setup the identity mapped 64bit page table */ |
165 | result = init_pgtable(image, start_pgtable); | 165 | result = init_pgtable(image, start_pgtable); |
166 | if (result) | 166 | if (result) |
167 | return result; | 167 | return result; |
168 | 168 | ||
169 | /* Place the code in the reboot code buffer */ | ||
170 | memcpy(__va(control_code_buffer), relocate_new_kernel, | ||
171 | relocate_new_kernel_size); | ||
172 | |||
173 | return 0; | 169 | return 0; |
174 | } | 170 | } |
175 | 171 | ||
@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage *image) | |||
184 | */ | 180 | */ |
185 | NORET_TYPE void machine_kexec(struct kimage *image) | 181 | NORET_TYPE void machine_kexec(struct kimage *image) |
186 | { | 182 | { |
187 | unsigned long page_list; | 183 | unsigned long page_list[PAGES_NR]; |
188 | unsigned long control_code_buffer; | 184 | void *control_page; |
189 | unsigned long start_pgtable; | ||
190 | relocate_new_kernel_t rnk; | ||
191 | 185 | ||
192 | /* Interrupts aren't acceptable while we reboot */ | 186 | /* Interrupts aren't acceptable while we reboot */ |
193 | local_irq_disable(); | 187 | local_irq_disable(); |
194 | 188 | ||
195 | /* Calculate the offsets */ | 189 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
196 | page_list = image->head; | 190 | memcpy(control_page, relocate_kernel, PAGE_SIZE); |
197 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | 191 | |
198 | control_code_buffer = start_pgtable + PAGE_SIZE; | 192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
199 | 193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | |
200 | /* Set the low half of the page table to my identity mapped | 194 | page_list[PA_PGD] = __pa(kexec_pgd); |
201 | * page table for kexec. Leave the high half pointing at the | 195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
202 | * kernel pages. Don't bother to flush the global pages | 196 | page_list[PA_PUD_0] = __pa(kexec_pud0); |
203 | * as that will happen when I fully switch to my identity mapped | 197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; |
204 | * page table anyway. | 198 | page_list[PA_PMD_0] = __pa(kexec_pmd0); |
205 | */ | 199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
206 | memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); | 200 | page_list[PA_PTE_0] = __pa(kexec_pte0); |
207 | __flush_tlb(); | 201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
208 | 202 | page_list[PA_PUD_1] = __pa(kexec_pud1); | |
203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | ||
204 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | ||
205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
206 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
208 | |||
209 | page_list[PA_TABLE_PAGE] = | ||
210 | (unsigned long)__pa(page_address(image->control_code_page)); | ||
209 | 211 | ||
210 | /* The segment registers are funny things, they have both a | 212 | /* The segment registers are funny things, they have both a |
211 | * visible and an invisible part. Whenever the visible part is | 213 | * visible and an invisible part. Whenever the visible part is |
@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
222 | */ | 224 | */ |
223 | set_gdt(phys_to_virt(0),0); | 225 | set_gdt(phys_to_virt(0),0); |
224 | set_idt(phys_to_virt(0),0); | 226 | set_idt(phys_to_virt(0),0); |
227 | |||
225 | /* now call it */ | 228 | /* now call it */ |
226 | rnk = (relocate_new_kernel_t) control_code_buffer; | 229 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, |
227 | (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); | 230 | image->start); |
228 | } | 231 | } |
229 | 232 | ||
230 | /* crashkernel=size@addr specifies the location to reserve for | 233 | /* crashkernel=size@addr specifies the location to reserve for |