diff options
author | Magnus Damm <magnus@valinux.co.jp> | 2006-09-26 04:52:38 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:38 -0400 |
commit | 3566561bfadffcb5dbc85d576be80c0dbf2cccc9 (patch) | |
tree | eaafaf68156f9f2f1a2af8a7ffed95653fea8aa5 /arch/i386/kernel | |
parent | 4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f (diff) |
[PATCH] i386: Avoid overwriting the current pgd (V4, i386)
kexec: Avoid overwriting the current pgd (V4, i386)
This patch upgrades the i386-specific kexec code to avoid overwriting the
current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
to start a secondary kernel that dumps the memory of the previous kernel.
The code introduces a new set of page tables. These tables are used to provide
an executable identity mapping without overwriting the current pgd.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/machine_kexec.c | 117 | ||||
-rw-r--r-- | arch/i386/kernel/relocate_kernel.S | 162 |
2 files changed, 174 insertions, 105 deletions
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 66c3dc99a655..91966bafb3dc 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c | |||
@@ -21,70 +21,13 @@ | |||
21 | #include <asm/system.h> | 21 | #include <asm/system.h> |
22 | 22 | ||
23 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 23 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
24 | 24 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | |
25 | #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 25 | #ifdef CONFIG_X86_PAE |
26 | #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 26 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; |
27 | #define L2_ATTR (_PAGE_PRESENT) | 27 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; |
28 | |||
29 | #define LEVEL0_SIZE (1UL << 12UL) | ||
30 | |||
31 | #ifndef CONFIG_X86_PAE | ||
32 | #define LEVEL1_SIZE (1UL << 22UL) | ||
33 | static u32 pgtable_level1[1024] PAGE_ALIGNED; | ||
34 | |||
35 | static void identity_map_page(unsigned long address) | ||
36 | { | ||
37 | unsigned long level1_index, level2_index; | ||
38 | u32 *pgtable_level2; | ||
39 | |||
40 | /* Find the current page table */ | ||
41 | pgtable_level2 = __va(read_cr3()); | ||
42 | |||
43 | /* Find the indexes of the physical address to identity map */ | ||
44 | level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; | ||
45 | level2_index = address / LEVEL1_SIZE; | ||
46 | |||
47 | /* Identity map the page table entry */ | ||
48 | pgtable_level1[level1_index] = address | L0_ATTR; | ||
49 | pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; | ||
50 | |||
51 | /* Flush the tlb so the new mapping takes effect. | ||
52 | * Global tlb entries are not flushed but that is not an issue. | ||
53 | */ | ||
54 | load_cr3(pgtable_level2); | ||
55 | } | ||
56 | |||
57 | #else | ||
58 | #define LEVEL1_SIZE (1UL << 21UL) | ||
59 | #define LEVEL2_SIZE (1UL << 30UL) | ||
60 | static u64 pgtable_level1[512] PAGE_ALIGNED; | ||
61 | static u64 pgtable_level2[512] PAGE_ALIGNED; | ||
62 | |||
63 | static void identity_map_page(unsigned long address) | ||
64 | { | ||
65 | unsigned long level1_index, level2_index, level3_index; | ||
66 | u64 *pgtable_level3; | ||
67 | |||
68 | /* Find the current page table */ | ||
69 | pgtable_level3 = __va(read_cr3()); | ||
70 | |||
71 | /* Find the indexes of the physical address to identity map */ | ||
72 | level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; | ||
73 | level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; | ||
74 | level3_index = address / LEVEL2_SIZE; | ||
75 | |||
76 | /* Identity map the page table entry */ | ||
77 | pgtable_level1[level1_index] = address | L0_ATTR; | ||
78 | pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; | ||
79 | set_64bit(&pgtable_level3[level3_index], | ||
80 | __pa(pgtable_level2) | L2_ATTR); | ||
81 | |||
82 | /* Flush the tlb so the new mapping takes effect. | ||
83 | * Global tlb entries are not flushed but that is not an issue. | ||
84 | */ | ||
85 | load_cr3(pgtable_level3); | ||
86 | } | ||
87 | #endif | 28 | #endif |
29 | static u32 kexec_pte0[1024] PAGE_ALIGNED; | ||
30 | static u32 kexec_pte1[1024] PAGE_ALIGNED; | ||
88 | 31 | ||
89 | static void set_idt(void *newidt, __u16 limit) | 32 | static void set_idt(void *newidt, __u16 limit) |
90 | { | 33 | { |
@@ -128,16 +71,6 @@ static void load_segments(void) | |||
128 | #undef __STR | 71 | #undef __STR |
129 | } | 72 | } |
130 | 73 | ||
131 | typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( | ||
132 | unsigned long indirection_page, | ||
133 | unsigned long reboot_code_buffer, | ||
134 | unsigned long start_address, | ||
135 | unsigned int has_pae) ATTRIB_NORET; | ||
136 | |||
137 | extern const unsigned char relocate_new_kernel[]; | ||
138 | extern void relocate_new_kernel_end(void); | ||
139 | extern const unsigned int relocate_new_kernel_size; | ||
140 | |||
141 | /* | 74 | /* |
142 | * A architecture hook called to validate the | 75 | * A architecture hook called to validate the |
143 | * proposed image and prepare the control pages | 76 | * proposed image and prepare the control pages |
@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image) | |||
170 | */ | 103 | */ |
171 | NORET_TYPE void machine_kexec(struct kimage *image) | 104 | NORET_TYPE void machine_kexec(struct kimage *image) |
172 | { | 105 | { |
173 | unsigned long page_list; | 106 | unsigned long page_list[PAGES_NR]; |
174 | unsigned long reboot_code_buffer; | 107 | void *control_page; |
175 | |||
176 | relocate_new_kernel_t rnk; | ||
177 | 108 | ||
178 | /* Interrupts aren't acceptable while we reboot */ | 109 | /* Interrupts aren't acceptable while we reboot */ |
179 | local_irq_disable(); | 110 | local_irq_disable(); |
180 | 111 | ||
181 | /* Compute some offsets */ | 112 | control_page = page_address(image->control_code_page); |
182 | reboot_code_buffer = page_to_pfn(image->control_code_page) | 113 | memcpy(control_page, relocate_kernel, PAGE_SIZE); |
183 | << PAGE_SHIFT; | 114 | |
184 | page_list = image->head; | 115 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
185 | 116 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | |
186 | /* Set up an identity mapping for the reboot_code_buffer */ | 117 | page_list[PA_PGD] = __pa(kexec_pgd); |
187 | identity_map_page(reboot_code_buffer); | 118 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
188 | 119 | #ifdef CONFIG_X86_PAE | |
189 | /* copy it out */ | 120 | page_list[PA_PMD_0] = __pa(kexec_pmd0); |
190 | memcpy((void *)reboot_code_buffer, relocate_new_kernel, | 121 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
191 | relocate_new_kernel_size); | 122 | page_list[PA_PMD_1] = __pa(kexec_pmd1); |
123 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
124 | #endif | ||
125 | page_list[PA_PTE_0] = __pa(kexec_pte0); | ||
126 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
127 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
128 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
192 | 129 | ||
193 | /* The segment registers are funny things, they have both a | 130 | /* The segment registers are funny things, they have both a |
194 | * visible and an invisible part. Whenever the visible part is | 131 | * visible and an invisible part. Whenever the visible part is |
@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
207 | set_idt(phys_to_virt(0),0); | 144 | set_idt(phys_to_virt(0),0); |
208 | 145 | ||
209 | /* now call it */ | 146 | /* now call it */ |
210 | rnk = (relocate_new_kernel_t) reboot_code_buffer; | 147 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, |
211 | (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); | 148 | image->start, cpu_has_pae); |
212 | } | 149 | } |
213 | 150 | ||
214 | /* crashkernel=size@addr specifies the location to reserve for | 151 | /* crashkernel=size@addr specifies the location to reserve for |
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S index d312616effa1..f151d6fae462 100644 --- a/arch/i386/kernel/relocate_kernel.S +++ b/arch/i386/kernel/relocate_kernel.S | |||
@@ -7,16 +7,138 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/page.h> | ||
11 | #include <asm/kexec.h> | ||
12 | |||
13 | /* | ||
14 | * Must be relocatable PIC code callable as a C function | ||
15 | */ | ||
16 | |||
17 | #define PTR(x) (x << 2) | ||
18 | #define PAGE_ALIGNED (1 << PAGE_SHIFT) | ||
19 | #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ | ||
20 | #define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ | ||
21 | |||
22 | .text | ||
23 | .align PAGE_ALIGNED | ||
24 | .globl relocate_kernel | ||
25 | relocate_kernel: | ||
26 | movl 8(%esp), %ebp /* list of pages */ | ||
27 | |||
28 | #ifdef CONFIG_X86_PAE | ||
29 | /* map the control page at its virtual address */ | ||
30 | |||
31 | movl PTR(VA_PGD)(%ebp), %edi | ||
32 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
33 | andl $0xc0000000, %eax | ||
34 | shrl $27, %eax | ||
35 | addl %edi, %eax | ||
36 | |||
37 | movl PTR(PA_PMD_0)(%ebp), %edx | ||
38 | orl $PAE_PGD_ATTR, %edx | ||
39 | movl %edx, (%eax) | ||
40 | |||
41 | movl PTR(VA_PMD_0)(%ebp), %edi | ||
42 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
43 | andl $0x3fe00000, %eax | ||
44 | shrl $18, %eax | ||
45 | addl %edi, %eax | ||
46 | |||
47 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
48 | orl $PAGE_ATTR, %edx | ||
49 | movl %edx, (%eax) | ||
50 | |||
51 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
52 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
53 | andl $0x001ff000, %eax | ||
54 | shrl $9, %eax | ||
55 | addl %edi, %eax | ||
56 | |||
57 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
58 | orl $PAGE_ATTR, %edx | ||
59 | movl %edx, (%eax) | ||
60 | |||
61 | /* identity map the control page at its physical address */ | ||
62 | |||
63 | movl PTR(VA_PGD)(%ebp), %edi | ||
64 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
65 | andl $0xc0000000, %eax | ||
66 | shrl $27, %eax | ||
67 | addl %edi, %eax | ||
68 | |||
69 | movl PTR(PA_PMD_1)(%ebp), %edx | ||
70 | orl $PAE_PGD_ATTR, %edx | ||
71 | movl %edx, (%eax) | ||
72 | |||
73 | movl PTR(VA_PMD_1)(%ebp), %edi | ||
74 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
75 | andl $0x3fe00000, %eax | ||
76 | shrl $18, %eax | ||
77 | addl %edi, %eax | ||
78 | |||
79 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
80 | orl $PAGE_ATTR, %edx | ||
81 | movl %edx, (%eax) | ||
82 | |||
83 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
84 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
85 | andl $0x001ff000, %eax | ||
86 | shrl $9, %eax | ||
87 | addl %edi, %eax | ||
88 | |||
89 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
90 | orl $PAGE_ATTR, %edx | ||
91 | movl %edx, (%eax) | ||
92 | #else | ||
93 | /* map the control page at its virtual address */ | ||
94 | |||
95 | movl PTR(VA_PGD)(%ebp), %edi | ||
96 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
97 | andl $0xffc00000, %eax | ||
98 | shrl $20, %eax | ||
99 | addl %edi, %eax | ||
100 | |||
101 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
102 | orl $PAGE_ATTR, %edx | ||
103 | movl %edx, (%eax) | ||
104 | |||
105 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
106 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
107 | andl $0x003ff000, %eax | ||
108 | shrl $10, %eax | ||
109 | addl %edi, %eax | ||
110 | |||
111 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
112 | orl $PAGE_ATTR, %edx | ||
113 | movl %edx, (%eax) | ||
114 | |||
115 | /* identity map the control page at its physical address */ | ||
116 | |||
117 | movl PTR(VA_PGD)(%ebp), %edi | ||
118 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
119 | andl $0xffc00000, %eax | ||
120 | shrl $20, %eax | ||
121 | addl %edi, %eax | ||
122 | |||
123 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
124 | orl $PAGE_ATTR, %edx | ||
125 | movl %edx, (%eax) | ||
126 | |||
127 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
128 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
129 | andl $0x003ff000, %eax | ||
130 | shrl $10, %eax | ||
131 | addl %edi, %eax | ||
132 | |||
133 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
134 | orl $PAGE_ATTR, %edx | ||
135 | movl %edx, (%eax) | ||
136 | #endif | ||
10 | 137 | ||
11 | /* | ||
12 | * Must be relocatable PIC code callable as a C function, that once | ||
13 | * it starts can not use the previous processes stack. | ||
14 | */ | ||
15 | .globl relocate_new_kernel | ||
16 | relocate_new_kernel: | 138 | relocate_new_kernel: |
17 | /* read the arguments and say goodbye to the stack */ | 139 | /* read the arguments and say goodbye to the stack */ |
18 | movl 4(%esp), %ebx /* page_list */ | 140 | movl 4(%esp), %ebx /* page_list */ |
19 | movl 8(%esp), %ebp /* reboot_code_buffer */ | 141 | movl 8(%esp), %ebp /* list of pages */ |
20 | movl 12(%esp), %edx /* start address */ | 142 | movl 12(%esp), %edx /* start address */ |
21 | movl 16(%esp), %ecx /* cpu_has_pae */ | 143 | movl 16(%esp), %ecx /* cpu_has_pae */ |
22 | 144 | ||
@@ -24,11 +146,26 @@ relocate_new_kernel: | |||
24 | pushl $0 | 146 | pushl $0 |
25 | popfl | 147 | popfl |
26 | 148 | ||
27 | /* set a new stack at the bottom of our page... */ | 149 | /* get physical address of control page now */ |
28 | lea 4096(%ebp), %esp | 150 | /* this is impossible after page table switch */ |
151 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | ||
29 | 152 | ||
30 | /* store the parameters back on the stack */ | 153 | /* switch to new set of page tables */ |
31 | pushl %edx /* store the start address */ | 154 | movl PTR(PA_PGD)(%ebp), %eax |
155 | movl %eax, %cr3 | ||
156 | |||
157 | /* setup a new stack at the end of the physical control page */ | ||
158 | lea 4096(%edi), %esp | ||
159 | |||
160 | /* jump to identity mapped page */ | ||
161 | movl %edi, %eax | ||
162 | addl $(identity_mapped - relocate_kernel), %eax | ||
163 | pushl %eax | ||
164 | ret | ||
165 | |||
166 | identity_mapped: | ||
167 | /* store the start address on the stack */ | ||
168 | pushl %edx | ||
32 | 169 | ||
33 | /* Set cr0 to a known state: | 170 | /* Set cr0 to a known state: |
34 | * 31 0 == Paging disabled | 171 | * 31 0 == Paging disabled |
@@ -113,8 +250,3 @@ relocate_new_kernel: | |||
113 | xorl %edi, %edi | 250 | xorl %edi, %edi |
114 | xorl %ebp, %ebp | 251 | xorl %ebp, %ebp |
115 | ret | 252 | ret |
116 | relocate_new_kernel_end: | ||
117 | |||
118 | .globl relocate_new_kernel_size | ||
119 | relocate_new_kernel_size: | ||
120 | .long relocate_new_kernel_end - relocate_new_kernel | ||