diff options
author | Magnus Damm <magnus@valinux.co.jp> | 2006-09-26 04:52:38 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:38 -0400 |
commit | 4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f (patch) | |
tree | 652effe9d02c80d93357512233f4ee962320ac3b | |
parent | f574164491d00d28b727d713685fb5edc9138200 (diff) |
[PATCH] Avoid overwriting the current pgd (V4, x86_64)
kexec: Avoid overwriting the current pgd (V4, x86_64)
This patch upgrades the x86_64-specific kexec code to avoid overwriting the
current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
to start a secondary kernel that dumps the memory of the previous kernel.
The code introduces a new set of page tables. These tables are used to provide
an executable identity mapping without overwriting the current pgd.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r-- | arch/x86_64/kernel/machine_kexec.c | 71 | ||||
-rw-r--r-- | arch/x86_64/kernel/relocate_kernel.S | 171 | ||||
-rw-r--r-- | include/asm-x86_64/kexec.h | 29 |
3 files changed, 218 insertions, 53 deletions
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 2e94c072d84a..0497e3bd5bff 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c | |||
@@ -15,6 +15,15 @@ | |||
15 | #include <asm/mmu_context.h> | 15 | #include <asm/mmu_context.h> |
16 | #include <asm/io.h> | 16 | #include <asm/io.h> |
17 | 17 | ||
18 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
19 | static u64 kexec_pgd[512] PAGE_ALIGNED; | ||
20 | static u64 kexec_pud0[512] PAGE_ALIGNED; | ||
21 | static u64 kexec_pmd0[512] PAGE_ALIGNED; | ||
22 | static u64 kexec_pte0[512] PAGE_ALIGNED; | ||
23 | static u64 kexec_pud1[512] PAGE_ALIGNED; | ||
24 | static u64 kexec_pmd1[512] PAGE_ALIGNED; | ||
25 | static u64 kexec_pte1[512] PAGE_ALIGNED; | ||
26 | |||
18 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | 27 | static void init_level2_page(pmd_t *level2p, unsigned long addr) |
19 | { | 28 | { |
20 | unsigned long end_addr; | 29 | unsigned long end_addr; |
@@ -144,32 +153,19 @@ static void load_segments(void) | |||
144 | ); | 153 | ); |
145 | } | 154 | } |
146 | 155 | ||
147 | typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, | ||
148 | unsigned long control_code_buffer, | ||
149 | unsigned long start_address, | ||
150 | unsigned long pgtable) ATTRIB_NORET; | ||
151 | |||
152 | extern const unsigned char relocate_new_kernel[]; | ||
153 | extern const unsigned long relocate_new_kernel_size; | ||
154 | |||
155 | int machine_kexec_prepare(struct kimage *image) | 156 | int machine_kexec_prepare(struct kimage *image) |
156 | { | 157 | { |
157 | unsigned long start_pgtable, control_code_buffer; | 158 | unsigned long start_pgtable; |
158 | int result; | 159 | int result; |
159 | 160 | ||
160 | /* Calculate the offsets */ | 161 | /* Calculate the offsets */ |
161 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | 162 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; |
162 | control_code_buffer = start_pgtable + PAGE_SIZE; | ||
163 | 163 | ||
164 | /* Setup the identity mapped 64bit page table */ | 164 | /* Setup the identity mapped 64bit page table */ |
165 | result = init_pgtable(image, start_pgtable); | 165 | result = init_pgtable(image, start_pgtable); |
166 | if (result) | 166 | if (result) |
167 | return result; | 167 | return result; |
168 | 168 | ||
169 | /* Place the code in the reboot code buffer */ | ||
170 | memcpy(__va(control_code_buffer), relocate_new_kernel, | ||
171 | relocate_new_kernel_size); | ||
172 | |||
173 | return 0; | 169 | return 0; |
174 | } | 170 | } |
175 | 171 | ||
@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage *image) | |||
184 | */ | 180 | */ |
185 | NORET_TYPE void machine_kexec(struct kimage *image) | 181 | NORET_TYPE void machine_kexec(struct kimage *image) |
186 | { | 182 | { |
187 | unsigned long page_list; | 183 | unsigned long page_list[PAGES_NR]; |
188 | unsigned long control_code_buffer; | 184 | void *control_page; |
189 | unsigned long start_pgtable; | ||
190 | relocate_new_kernel_t rnk; | ||
191 | 185 | ||
192 | /* Interrupts aren't acceptable while we reboot */ | 186 | /* Interrupts aren't acceptable while we reboot */ |
193 | local_irq_disable(); | 187 | local_irq_disable(); |
194 | 188 | ||
195 | /* Calculate the offsets */ | 189 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
196 | page_list = image->head; | 190 | memcpy(control_page, relocate_kernel, PAGE_SIZE); |
197 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | 191 | |
198 | control_code_buffer = start_pgtable + PAGE_SIZE; | 192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
199 | 193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | |
200 | /* Set the low half of the page table to my identity mapped | 194 | page_list[PA_PGD] = __pa(kexec_pgd); |
201 | * page table for kexec. Leave the high half pointing at the | 195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
202 | * kernel pages. Don't bother to flush the global pages | 196 | page_list[PA_PUD_0] = __pa(kexec_pud0); |
203 | * as that will happen when I fully switch to my identity mapped | 197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; |
204 | * page table anyway. | 198 | page_list[PA_PMD_0] = __pa(kexec_pmd0); |
205 | */ | 199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
206 | memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); | 200 | page_list[PA_PTE_0] = __pa(kexec_pte0); |
207 | __flush_tlb(); | 201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
208 | 202 | page_list[PA_PUD_1] = __pa(kexec_pud1); | |
203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | ||
204 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | ||
205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
206 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
208 | |||
209 | page_list[PA_TABLE_PAGE] = | ||
210 | (unsigned long)__pa(page_address(image->control_code_page)); | ||
209 | 211 | ||
210 | /* The segment registers are funny things, they have both a | 212 | /* The segment registers are funny things, they have both a |
211 | * visible and an invisible part. Whenever the visible part is | 213 | * visible and an invisible part. Whenever the visible part is |
@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
222 | */ | 224 | */ |
223 | set_gdt(phys_to_virt(0),0); | 225 | set_gdt(phys_to_virt(0),0); |
224 | set_idt(phys_to_virt(0),0); | 226 | set_idt(phys_to_virt(0),0); |
227 | |||
225 | /* now call it */ | 228 | /* now call it */ |
226 | rnk = (relocate_new_kernel_t) control_code_buffer; | 229 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, |
227 | (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); | 230 | image->start); |
228 | } | 231 | } |
229 | 232 | ||
230 | /* crashkernel=size@addr specifies the location to reserve for | 233 | /* crashkernel=size@addr specifies the location to reserve for |
diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S index d24fa9b72a2b..14e95872c6a3 100644 --- a/arch/x86_64/kernel/relocate_kernel.S +++ b/arch/x86_64/kernel/relocate_kernel.S | |||
@@ -7,31 +7,169 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/page.h> | ||
11 | #include <asm/kexec.h> | ||
10 | 12 | ||
11 | /* | 13 | /* |
12 | * Must be relocatable PIC code callable as a C function, that once | 14 | * Must be relocatable PIC code callable as a C function |
13 | * it starts can not use the previous processes stack. | 15 | */ |
14 | */ | 16 | |
15 | .globl relocate_new_kernel | 17 | #define PTR(x) (x << 3) |
18 | #define PAGE_ALIGNED (1 << PAGE_SHIFT) | ||
19 | #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ | ||
20 | |||
21 | .text | ||
22 | .align PAGE_ALIGNED | ||
16 | .code64 | 23 | .code64 |
24 | .globl relocate_kernel | ||
25 | relocate_kernel: | ||
26 | /* %rdi indirection_page | ||
27 | * %rsi page_list | ||
28 | * %rdx start address | ||
29 | */ | ||
30 | |||
31 | /* map the control page at its virtual address */ | ||
32 | |||
33 | movq $0x0000ff8000000000, %r10 /* mask */ | ||
34 | mov $(39 - 3), %cl /* bits to shift */ | ||
35 | movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ | ||
36 | |||
37 | movq %r11, %r9 | ||
38 | andq %r10, %r9 | ||
39 | shrq %cl, %r9 | ||
40 | |||
41 | movq PTR(VA_PGD)(%rsi), %r8 | ||
42 | addq %r8, %r9 | ||
43 | movq PTR(PA_PUD_0)(%rsi), %r8 | ||
44 | orq $PAGE_ATTR, %r8 | ||
45 | movq %r8, (%r9) | ||
46 | |||
47 | shrq $9, %r10 | ||
48 | sub $9, %cl | ||
49 | |||
50 | movq %r11, %r9 | ||
51 | andq %r10, %r9 | ||
52 | shrq %cl, %r9 | ||
53 | |||
54 | movq PTR(VA_PUD_0)(%rsi), %r8 | ||
55 | addq %r8, %r9 | ||
56 | movq PTR(PA_PMD_0)(%rsi), %r8 | ||
57 | orq $PAGE_ATTR, %r8 | ||
58 | movq %r8, (%r9) | ||
59 | |||
60 | shrq $9, %r10 | ||
61 | sub $9, %cl | ||
62 | |||
63 | movq %r11, %r9 | ||
64 | andq %r10, %r9 | ||
65 | shrq %cl, %r9 | ||
66 | |||
67 | movq PTR(VA_PMD_0)(%rsi), %r8 | ||
68 | addq %r8, %r9 | ||
69 | movq PTR(PA_PTE_0)(%rsi), %r8 | ||
70 | orq $PAGE_ATTR, %r8 | ||
71 | movq %r8, (%r9) | ||
72 | |||
73 | shrq $9, %r10 | ||
74 | sub $9, %cl | ||
75 | |||
76 | movq %r11, %r9 | ||
77 | andq %r10, %r9 | ||
78 | shrq %cl, %r9 | ||
79 | |||
80 | movq PTR(VA_PTE_0)(%rsi), %r8 | ||
81 | addq %r8, %r9 | ||
82 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
83 | orq $PAGE_ATTR, %r8 | ||
84 | movq %r8, (%r9) | ||
85 | |||
86 | /* identity map the control page at its physical address */ | ||
87 | |||
88 | movq $0x0000ff8000000000, %r10 /* mask */ | ||
89 | mov $(39 - 3), %cl /* bits to shift */ | ||
90 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ | ||
91 | |||
92 | movq %r11, %r9 | ||
93 | andq %r10, %r9 | ||
94 | shrq %cl, %r9 | ||
95 | |||
96 | movq PTR(VA_PGD)(%rsi), %r8 | ||
97 | addq %r8, %r9 | ||
98 | movq PTR(PA_PUD_1)(%rsi), %r8 | ||
99 | orq $PAGE_ATTR, %r8 | ||
100 | movq %r8, (%r9) | ||
101 | |||
102 | shrq $9, %r10 | ||
103 | sub $9, %cl | ||
104 | |||
105 | movq %r11, %r9 | ||
106 | andq %r10, %r9 | ||
107 | shrq %cl, %r9 | ||
108 | |||
109 | movq PTR(VA_PUD_1)(%rsi), %r8 | ||
110 | addq %r8, %r9 | ||
111 | movq PTR(PA_PMD_1)(%rsi), %r8 | ||
112 | orq $PAGE_ATTR, %r8 | ||
113 | movq %r8, (%r9) | ||
114 | |||
115 | shrq $9, %r10 | ||
116 | sub $9, %cl | ||
117 | |||
118 | movq %r11, %r9 | ||
119 | andq %r10, %r9 | ||
120 | shrq %cl, %r9 | ||
121 | |||
122 | movq PTR(VA_PMD_1)(%rsi), %r8 | ||
123 | addq %r8, %r9 | ||
124 | movq PTR(PA_PTE_1)(%rsi), %r8 | ||
125 | orq $PAGE_ATTR, %r8 | ||
126 | movq %r8, (%r9) | ||
127 | |||
128 | shrq $9, %r10 | ||
129 | sub $9, %cl | ||
130 | |||
131 | movq %r11, %r9 | ||
132 | andq %r10, %r9 | ||
133 | shrq %cl, %r9 | ||
134 | |||
135 | movq PTR(VA_PTE_1)(%rsi), %r8 | ||
136 | addq %r8, %r9 | ||
137 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
138 | orq $PAGE_ATTR, %r8 | ||
139 | movq %r8, (%r9) | ||
140 | |||
17 | relocate_new_kernel: | 141 | relocate_new_kernel: |
18 | /* %rdi page_list | 142 | /* %rdi indirection_page |
19 | * %rsi reboot_code_buffer | 143 | * %rsi page_list |
20 | * %rdx start address | 144 | * %rdx start address |
21 | * %rcx page_table | ||
22 | * %r8 arg5 | ||
23 | * %r9 arg6 | ||
24 | */ | 145 | */ |
25 | 146 | ||
26 | /* zero out flags, and disable interrupts */ | 147 | /* zero out flags, and disable interrupts */ |
27 | pushq $0 | 148 | pushq $0 |
28 | popfq | 149 | popfq |
29 | 150 | ||
30 | /* set a new stack at the bottom of our page... */ | 151 | /* get physical address of control page now */ |
31 | lea 4096(%rsi), %rsp | 152 | /* this is impossible after page table switch */ |
153 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
154 | |||
155 | /* get physical address of page table now too */ | ||
156 | movq PTR(PA_TABLE_PAGE)(%rsi), %rcx | ||
32 | 157 | ||
33 | /* store the parameters back on the stack */ | 158 | /* switch to new set of page tables */ |
34 | pushq %rdx /* store the start address */ | 159 | movq PTR(PA_PGD)(%rsi), %r9 |
160 | movq %r9, %cr3 | ||
161 | |||
162 | /* setup a new stack at the end of the physical control page */ | ||
163 | lea 4096(%r8), %rsp | ||
164 | |||
165 | /* jump to identity mapped page */ | ||
166 | addq $(identity_mapped - relocate_kernel), %r8 | ||
167 | pushq %r8 | ||
168 | ret | ||
169 | |||
170 | identity_mapped: | ||
171 | /* store the start address on the stack */ | ||
172 | pushq %rdx | ||
35 | 173 | ||
36 | /* Set cr0 to a known state: | 174 | /* Set cr0 to a known state: |
37 | * 31 1 == Paging enabled | 175 | * 31 1 == Paging enabled |
@@ -136,8 +274,3 @@ relocate_new_kernel: | |||
136 | xorq %r15, %r15 | 274 | xorq %r15, %r15 |
137 | 275 | ||
138 | ret | 276 | ret |
139 | relocate_new_kernel_end: | ||
140 | |||
141 | .globl relocate_new_kernel_size | ||
142 | relocate_new_kernel_size: | ||
143 | .quad relocate_new_kernel_end - relocate_new_kernel | ||
diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h index c564bae03433..5fab957e1091 100644 --- a/include/asm-x86_64/kexec.h +++ b/include/asm-x86_64/kexec.h | |||
@@ -1,6 +1,27 @@ | |||
1 | #ifndef _X86_64_KEXEC_H | 1 | #ifndef _X86_64_KEXEC_H |
2 | #define _X86_64_KEXEC_H | 2 | #define _X86_64_KEXEC_H |
3 | 3 | ||
4 | #define PA_CONTROL_PAGE 0 | ||
5 | #define VA_CONTROL_PAGE 1 | ||
6 | #define PA_PGD 2 | ||
7 | #define VA_PGD 3 | ||
8 | #define PA_PUD_0 4 | ||
9 | #define VA_PUD_0 5 | ||
10 | #define PA_PMD_0 6 | ||
11 | #define VA_PMD_0 7 | ||
12 | #define PA_PTE_0 8 | ||
13 | #define VA_PTE_0 9 | ||
14 | #define PA_PUD_1 10 | ||
15 | #define VA_PUD_1 11 | ||
16 | #define PA_PMD_1 12 | ||
17 | #define VA_PMD_1 13 | ||
18 | #define PA_PTE_1 14 | ||
19 | #define VA_PTE_1 15 | ||
20 | #define PA_TABLE_PAGE 16 | ||
21 | #define PAGES_NR 17 | ||
22 | |||
23 | #ifndef __ASSEMBLY__ | ||
24 | |||
4 | #include <linux/string.h> | 25 | #include <linux/string.h> |
5 | 26 | ||
6 | #include <asm/page.h> | 27 | #include <asm/page.h> |
@@ -64,4 +85,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs, | |||
64 | newregs->rip = (unsigned long)current_text_addr(); | 85 | newregs->rip = (unsigned long)current_text_addr(); |
65 | } | 86 | } |
66 | } | 87 | } |
88 | |||
89 | NORET_TYPE void | ||
90 | relocate_kernel(unsigned long indirection_page, | ||
91 | unsigned long page_list, | ||
92 | unsigned long start_address) ATTRIB_NORET; | ||
93 | |||
94 | #endif /* __ASSEMBLY__ */ | ||
95 | |||
67 | #endif /* _X86_64_KEXEC_H */ | 96 | #endif /* _X86_64_KEXEC_H */ |