aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMagnus Damm <magnus@valinux.co.jp>2006-09-26 04:52:38 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:38 -0400
commit4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f (patch)
tree652effe9d02c80d93357512233f4ee962320ac3b
parentf574164491d00d28b727d713685fb5edc9138200 (diff)
[PATCH] Avoid overwriting the current pgd (V4, x86_64)
kexec: Avoid overwriting the current pgd (V4, x86_64) This patch upgrades the x86_64-specific kexec code to avoid overwriting the current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used to start a secondary kernel that dumps the memory of the previous kernel. The code introduces a new set of page tables. These tables are used to provide an executable identity mapping without overwriting the current pgd. Signed-off-by: Magnus Damm <magnus@valinux.co.jp> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/x86_64/kernel/machine_kexec.c71
-rw-r--r--arch/x86_64/kernel/relocate_kernel.S171
-rw-r--r--include/asm-x86_64/kexec.h29
3 files changed, 218 insertions, 53 deletions
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 2e94c072d84a..0497e3bd5bff 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -15,6 +15,15 @@
15#include <asm/mmu_context.h> 15#include <asm/mmu_context.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
19static u64 kexec_pgd[512] PAGE_ALIGNED;
20static u64 kexec_pud0[512] PAGE_ALIGNED;
21static u64 kexec_pmd0[512] PAGE_ALIGNED;
22static u64 kexec_pte0[512] PAGE_ALIGNED;
23static u64 kexec_pud1[512] PAGE_ALIGNED;
24static u64 kexec_pmd1[512] PAGE_ALIGNED;
25static u64 kexec_pte1[512] PAGE_ALIGNED;
26
18static void init_level2_page(pmd_t *level2p, unsigned long addr) 27static void init_level2_page(pmd_t *level2p, unsigned long addr)
19{ 28{
20 unsigned long end_addr; 29 unsigned long end_addr;
@@ -144,32 +153,19 @@ static void load_segments(void)
144 ); 153 );
145} 154}
146 155
147typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
148 unsigned long control_code_buffer,
149 unsigned long start_address,
150 unsigned long pgtable) ATTRIB_NORET;
151
152extern const unsigned char relocate_new_kernel[];
153extern const unsigned long relocate_new_kernel_size;
154
155int machine_kexec_prepare(struct kimage *image) 156int machine_kexec_prepare(struct kimage *image)
156{ 157{
157 unsigned long start_pgtable, control_code_buffer; 158 unsigned long start_pgtable;
158 int result; 159 int result;
159 160
160 /* Calculate the offsets */ 161 /* Calculate the offsets */
161 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; 162 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
162 control_code_buffer = start_pgtable + PAGE_SIZE;
163 163
164 /* Setup the identity mapped 64bit page table */ 164 /* Setup the identity mapped 64bit page table */
165 result = init_pgtable(image, start_pgtable); 165 result = init_pgtable(image, start_pgtable);
166 if (result) 166 if (result)
167 return result; 167 return result;
168 168
169 /* Place the code in the reboot code buffer */
170 memcpy(__va(control_code_buffer), relocate_new_kernel,
171 relocate_new_kernel_size);
172
173 return 0; 169 return 0;
174} 170}
175 171
@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage *image)
184 */ 180 */
185NORET_TYPE void machine_kexec(struct kimage *image) 181NORET_TYPE void machine_kexec(struct kimage *image)
186{ 182{
187 unsigned long page_list; 183 unsigned long page_list[PAGES_NR];
188 unsigned long control_code_buffer; 184 void *control_page;
189 unsigned long start_pgtable;
190 relocate_new_kernel_t rnk;
191 185
192 /* Interrupts aren't acceptable while we reboot */ 186 /* Interrupts aren't acceptable while we reboot */
193 local_irq_disable(); 187 local_irq_disable();
194 188
195 /* Calculate the offsets */ 189 control_page = page_address(image->control_code_page) + PAGE_SIZE;
196 page_list = image->head; 190 memcpy(control_page, relocate_kernel, PAGE_SIZE);
197 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; 191
198 control_code_buffer = start_pgtable + PAGE_SIZE; 192 page_list[PA_CONTROL_PAGE] = __pa(control_page);
199 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
200 /* Set the low half of the page table to my identity mapped 194 page_list[PA_PGD] = __pa(kexec_pgd);
201 * page table for kexec. Leave the high half pointing at the 195 page_list[VA_PGD] = (unsigned long)kexec_pgd;
202 * kernel pages. Don't bother to flush the global pages 196 page_list[PA_PUD_0] = __pa(kexec_pud0);
203 * as that will happen when I fully switch to my identity mapped 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
204 * page table anyway. 198 page_list[PA_PMD_0] = __pa(kexec_pmd0);
205 */ 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
206 memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); 200 page_list[PA_PTE_0] = __pa(kexec_pte0);
207 __flush_tlb(); 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
208 202 page_list[PA_PUD_1] = __pa(kexec_pud1);
203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
204 page_list[PA_PMD_1] = __pa(kexec_pmd1);
205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
206 page_list[PA_PTE_1] = __pa(kexec_pte1);
207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
208
209 page_list[PA_TABLE_PAGE] =
210 (unsigned long)__pa(page_address(image->control_code_page));
209 211
210 /* The segment registers are funny things, they have both a 212 /* The segment registers are funny things, they have both a
211 * visible and an invisible part. Whenever the visible part is 213 * visible and an invisible part. Whenever the visible part is
@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
222 */ 224 */
223 set_gdt(phys_to_virt(0),0); 225 set_gdt(phys_to_virt(0),0);
224 set_idt(phys_to_virt(0),0); 226 set_idt(phys_to_virt(0),0);
227
225 /* now call it */ 228 /* now call it */
226 rnk = (relocate_new_kernel_t) control_code_buffer; 229 relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
227 (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); 230 image->start);
228} 231}
229 232
230/* crashkernel=size@addr specifies the location to reserve for 233/* crashkernel=size@addr specifies the location to reserve for
diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S
index d24fa9b72a2b..14e95872c6a3 100644
--- a/arch/x86_64/kernel/relocate_kernel.S
+++ b/arch/x86_64/kernel/relocate_kernel.S
@@ -7,31 +7,169 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h>
11#include <asm/kexec.h>
10 12
11 /* 13/*
12 * Must be relocatable PIC code callable as a C function, that once 14 * Must be relocatable PIC code callable as a C function
13 * it starts can not use the previous processes stack. 15 */
14 */ 16
15 .globl relocate_new_kernel 17#define PTR(x) (x << 3)
18#define PAGE_ALIGNED (1 << PAGE_SHIFT)
19#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
20
21 .text
22 .align PAGE_ALIGNED
16 .code64 23 .code64
24 .globl relocate_kernel
25relocate_kernel:
26 /* %rdi indirection_page
27 * %rsi page_list
28 * %rdx start address
29 */
30
31 /* map the control page at its virtual address */
32
33 movq $0x0000ff8000000000, %r10 /* mask */
34 mov $(39 - 3), %cl /* bits to shift */
35 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
36
37 movq %r11, %r9
38 andq %r10, %r9
39 shrq %cl, %r9
40
41 movq PTR(VA_PGD)(%rsi), %r8
42 addq %r8, %r9
43 movq PTR(PA_PUD_0)(%rsi), %r8
44 orq $PAGE_ATTR, %r8
45 movq %r8, (%r9)
46
47 shrq $9, %r10
48 sub $9, %cl
49
50 movq %r11, %r9
51 andq %r10, %r9
52 shrq %cl, %r9
53
54 movq PTR(VA_PUD_0)(%rsi), %r8
55 addq %r8, %r9
56 movq PTR(PA_PMD_0)(%rsi), %r8
57 orq $PAGE_ATTR, %r8
58 movq %r8, (%r9)
59
60 shrq $9, %r10
61 sub $9, %cl
62
63 movq %r11, %r9
64 andq %r10, %r9
65 shrq %cl, %r9
66
67 movq PTR(VA_PMD_0)(%rsi), %r8
68 addq %r8, %r9
69 movq PTR(PA_PTE_0)(%rsi), %r8
70 orq $PAGE_ATTR, %r8
71 movq %r8, (%r9)
72
73 shrq $9, %r10
74 sub $9, %cl
75
76 movq %r11, %r9
77 andq %r10, %r9
78 shrq %cl, %r9
79
80 movq PTR(VA_PTE_0)(%rsi), %r8
81 addq %r8, %r9
82 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
83 orq $PAGE_ATTR, %r8
84 movq %r8, (%r9)
85
86 /* identity map the control page at its physical address */
87
88 movq $0x0000ff8000000000, %r10 /* mask */
89 mov $(39 - 3), %cl /* bits to shift */
90 movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
91
92 movq %r11, %r9
93 andq %r10, %r9
94 shrq %cl, %r9
95
96 movq PTR(VA_PGD)(%rsi), %r8
97 addq %r8, %r9
98 movq PTR(PA_PUD_1)(%rsi), %r8
99 orq $PAGE_ATTR, %r8
100 movq %r8, (%r9)
101
102 shrq $9, %r10
103 sub $9, %cl
104
105 movq %r11, %r9
106 andq %r10, %r9
107 shrq %cl, %r9
108
109 movq PTR(VA_PUD_1)(%rsi), %r8
110 addq %r8, %r9
111 movq PTR(PA_PMD_1)(%rsi), %r8
112 orq $PAGE_ATTR, %r8
113 movq %r8, (%r9)
114
115 shrq $9, %r10
116 sub $9, %cl
117
118 movq %r11, %r9
119 andq %r10, %r9
120 shrq %cl, %r9
121
122 movq PTR(VA_PMD_1)(%rsi), %r8
123 addq %r8, %r9
124 movq PTR(PA_PTE_1)(%rsi), %r8
125 orq $PAGE_ATTR, %r8
126 movq %r8, (%r9)
127
128 shrq $9, %r10
129 sub $9, %cl
130
131 movq %r11, %r9
132 andq %r10, %r9
133 shrq %cl, %r9
134
135 movq PTR(VA_PTE_1)(%rsi), %r8
136 addq %r8, %r9
137 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
138 orq $PAGE_ATTR, %r8
139 movq %r8, (%r9)
140
17relocate_new_kernel: 141relocate_new_kernel:
18 /* %rdi page_list 142 /* %rdi indirection_page
19 * %rsi reboot_code_buffer 143 * %rsi page_list
20 * %rdx start address 144 * %rdx start address
21 * %rcx page_table
22 * %r8 arg5
23 * %r9 arg6
24 */ 145 */
25 146
26 /* zero out flags, and disable interrupts */ 147 /* zero out flags, and disable interrupts */
27 pushq $0 148 pushq $0
28 popfq 149 popfq
29 150
30 /* set a new stack at the bottom of our page... */ 151 /* get physical address of control page now */
31 lea 4096(%rsi), %rsp 152 /* this is impossible after page table switch */
153 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
154
155 /* get physical address of page table now too */
156 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
32 157
33 /* store the parameters back on the stack */ 158 /* switch to new set of page tables */
34 pushq %rdx /* store the start address */ 159 movq PTR(PA_PGD)(%rsi), %r9
160 movq %r9, %cr3
161
162 /* setup a new stack at the end of the physical control page */
163 lea 4096(%r8), %rsp
164
165 /* jump to identity mapped page */
166 addq $(identity_mapped - relocate_kernel), %r8
167 pushq %r8
168 ret
169
170identity_mapped:
171 /* store the start address on the stack */
172 pushq %rdx
35 173
36 /* Set cr0 to a known state: 174 /* Set cr0 to a known state:
37 * 31 1 == Paging enabled 175 * 31 1 == Paging enabled
@@ -136,8 +274,3 @@ relocate_new_kernel:
136 xorq %r15, %r15 274 xorq %r15, %r15
137 275
138 ret 276 ret
139relocate_new_kernel_end:
140
141 .globl relocate_new_kernel_size
142relocate_new_kernel_size:
143 .quad relocate_new_kernel_end - relocate_new_kernel
diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h
index c564bae03433..5fab957e1091 100644
--- a/include/asm-x86_64/kexec.h
+++ b/include/asm-x86_64/kexec.h
@@ -1,6 +1,27 @@
1#ifndef _X86_64_KEXEC_H 1#ifndef _X86_64_KEXEC_H
2#define _X86_64_KEXEC_H 2#define _X86_64_KEXEC_H
3 3
4#define PA_CONTROL_PAGE 0
5#define VA_CONTROL_PAGE 1
6#define PA_PGD 2
7#define VA_PGD 3
8#define PA_PUD_0 4
9#define VA_PUD_0 5
10#define PA_PMD_0 6
11#define VA_PMD_0 7
12#define PA_PTE_0 8
13#define VA_PTE_0 9
14#define PA_PUD_1 10
15#define VA_PUD_1 11
16#define PA_PMD_1 12
17#define VA_PMD_1 13
18#define PA_PTE_1 14
19#define VA_PTE_1 15
20#define PA_TABLE_PAGE 16
21#define PAGES_NR 17
22
23#ifndef __ASSEMBLY__
24
4#include <linux/string.h> 25#include <linux/string.h>
5 26
6#include <asm/page.h> 27#include <asm/page.h>
@@ -64,4 +85,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
64 newregs->rip = (unsigned long)current_text_addr(); 85 newregs->rip = (unsigned long)current_text_addr();
65 } 86 }
66} 87}
88
89NORET_TYPE void
90relocate_kernel(unsigned long indirection_page,
91 unsigned long page_list,
92 unsigned long start_address) ATTRIB_NORET;
93
94#endif /* __ASSEMBLY__ */
95
67#endif /* _X86_64_KEXEC_H */ 96#endif /* _X86_64_KEXEC_H */