diff options
-rw-r--r-- | arch/x86/mm/fault_32.c | 116 | ||||
-rw-r--r-- | arch/x86/mm/fault_64.c | 148 |
2 files changed, 263 insertions, 1 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 28ea3d3ec8f8..7d9ecbbba745 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c | |||
@@ -173,8 +173,17 @@ static void force_sig_info_fault(int si_signo, int si_code, | |||
173 | force_sig_info(si_signo, &info, tsk); | 173 | force_sig_info(si_signo, &info, tsk); |
174 | } | 174 | } |
175 | 175 | ||
176 | #ifdef CONFIG_X86_64 | ||
177 | static int bad_address(void *p) | ||
178 | { | ||
179 | unsigned long dummy; | ||
180 | return probe_kernel_address((unsigned long *)p, dummy); | ||
181 | } | ||
182 | #endif | ||
183 | |||
176 | void dump_pagetable(unsigned long address) | 184 | void dump_pagetable(unsigned long address) |
177 | { | 185 | { |
186 | #ifdef CONFIG_X86_32 | ||
178 | __typeof__(pte_val(__pte(0))) page; | 187 | __typeof__(pte_val(__pte(0))) page; |
179 | 188 | ||
180 | page = read_cr3(); | 189 | page = read_cr3(); |
@@ -209,8 +218,42 @@ void dump_pagetable(unsigned long address) | |||
209 | } | 218 | } |
210 | 219 | ||
211 | printk("\n"); | 220 | printk("\n"); |
221 | #else /* CONFIG_X86_64 */ | ||
222 | pgd_t *pgd; | ||
223 | pud_t *pud; | ||
224 | pmd_t *pmd; | ||
225 | pte_t *pte; | ||
226 | |||
227 | pgd = (pgd_t *)read_cr3(); | ||
228 | |||
229 | pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); | ||
230 | pgd += pgd_index(address); | ||
231 | if (bad_address(pgd)) goto bad; | ||
232 | printk("PGD %lx ", pgd_val(*pgd)); | ||
233 | if (!pgd_present(*pgd)) goto ret; | ||
234 | |||
235 | pud = pud_offset(pgd, address); | ||
236 | if (bad_address(pud)) goto bad; | ||
237 | printk("PUD %lx ", pud_val(*pud)); | ||
238 | if (!pud_present(*pud)) goto ret; | ||
239 | |||
240 | pmd = pmd_offset(pud, address); | ||
241 | if (bad_address(pmd)) goto bad; | ||
242 | printk("PMD %lx ", pmd_val(*pmd)); | ||
243 | if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; | ||
244 | |||
245 | pte = pte_offset_kernel(pmd, address); | ||
246 | if (bad_address(pte)) goto bad; | ||
247 | printk("PTE %lx", pte_val(*pte)); | ||
248 | ret: | ||
249 | printk("\n"); | ||
250 | return; | ||
251 | bad: | ||
252 | printk("BAD\n"); | ||
253 | #endif | ||
212 | } | 254 | } |
213 | 255 | ||
256 | #ifdef CONFIG_X86_32 | ||
214 | static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | 257 | static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) |
215 | { | 258 | { |
216 | unsigned index = pgd_index(address); | 259 | unsigned index = pgd_index(address); |
@@ -246,6 +289,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
246 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | 289 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
247 | return pmd_k; | 290 | return pmd_k; |
248 | } | 291 | } |
292 | #endif | ||
249 | 293 | ||
250 | #ifdef CONFIG_X86_64 | 294 | #ifdef CONFIG_X86_64 |
251 | static const char errata93_warning[] = | 295 | static const char errata93_warning[] = |
@@ -326,6 +370,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) | |||
326 | static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, | 370 | static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, |
327 | unsigned long address) | 371 | unsigned long address) |
328 | { | 372 | { |
373 | #ifdef CONFIG_X86_32 | ||
329 | if (!oops_may_print()) | 374 | if (!oops_may_print()) |
330 | return; | 375 | return; |
331 | 376 | ||
@@ -350,8 +395,40 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
350 | printk(KERN_ALERT "IP:"); | 395 | printk(KERN_ALERT "IP:"); |
351 | printk_address(regs->ip, 1); | 396 | printk_address(regs->ip, 1); |
352 | dump_pagetable(address); | 397 | dump_pagetable(address); |
398 | #else /* CONFIG_X86_64 */ | ||
399 | printk(KERN_ALERT "BUG: unable to handle kernel "); | ||
400 | if (address < PAGE_SIZE) | ||
401 | printk(KERN_CONT "NULL pointer dereference"); | ||
402 | else | ||
403 | printk(KERN_CONT "paging request"); | ||
404 | printk(KERN_CONT " at %016lx\n", address); | ||
405 | |||
406 | printk(KERN_ALERT "IP:"); | ||
407 | printk_address(regs->ip, 1); | ||
408 | dump_pagetable(address); | ||
409 | #endif | ||
353 | } | 410 | } |
354 | 411 | ||
412 | #ifdef CONFIG_X86_64 | ||
413 | static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | ||
414 | unsigned long error_code) | ||
415 | { | ||
416 | unsigned long flags = oops_begin(); | ||
417 | struct task_struct *tsk; | ||
418 | |||
419 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | ||
420 | current->comm, address); | ||
421 | dump_pagetable(address); | ||
422 | tsk = current; | ||
423 | tsk->thread.cr2 = address; | ||
424 | tsk->thread.trap_no = 14; | ||
425 | tsk->thread.error_code = error_code; | ||
426 | if (__die("Bad pagetable", regs, error_code)) | ||
427 | regs = NULL; | ||
428 | oops_end(flags, regs, SIGKILL); | ||
429 | } | ||
430 | #endif | ||
431 | |||
355 | /* | 432 | /* |
356 | * Handle a fault on the vmalloc or module mapping area | 433 | * Handle a fault on the vmalloc or module mapping area |
357 | * | 434 | * |
@@ -706,6 +783,7 @@ do_sigbus: | |||
706 | 783 | ||
707 | void vmalloc_sync_all(void) | 784 | void vmalloc_sync_all(void) |
708 | { | 785 | { |
786 | #ifdef CONFIG_X86_32 | ||
709 | /* | 787 | /* |
710 | * Note that races in the updates of insync and start aren't | 788 | * Note that races in the updates of insync and start aren't |
711 | * problematic: insync can only get set bits added, and updates to | 789 | * problematic: insync can only get set bits added, and updates to |
@@ -740,4 +818,42 @@ void vmalloc_sync_all(void) | |||
740 | if (address == start && test_bit(pgd_index(address), insync)) | 818 | if (address == start && test_bit(pgd_index(address), insync)) |
741 | start = address + PGDIR_SIZE; | 819 | start = address + PGDIR_SIZE; |
742 | } | 820 | } |
821 | #else /* CONFIG_X86_64 */ | ||
822 | /* | ||
823 | * Note that races in the updates of insync and start aren't | ||
824 | * problematic: insync can only get set bits added, and updates to | ||
825 | * start are only improving performance (without affecting correctness | ||
826 | * if undone). | ||
827 | */ | ||
828 | static DECLARE_BITMAP(insync, PTRS_PER_PGD); | ||
829 | static unsigned long start = VMALLOC_START & PGDIR_MASK; | ||
830 | unsigned long address; | ||
831 | |||
832 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | ||
833 | if (!test_bit(pgd_index(address), insync)) { | ||
834 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
835 | struct page *page; | ||
836 | |||
837 | if (pgd_none(*pgd_ref)) | ||
838 | continue; | ||
839 | spin_lock(&pgd_lock); | ||
840 | list_for_each_entry(page, &pgd_list, lru) { | ||
841 | pgd_t *pgd; | ||
842 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
843 | if (pgd_none(*pgd)) | ||
844 | set_pgd(pgd, *pgd_ref); | ||
845 | else | ||
846 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | ||
847 | } | ||
848 | spin_unlock(&pgd_lock); | ||
849 | set_bit(pgd_index(address), insync); | ||
850 | } | ||
851 | if (address == start) | ||
852 | start = address + PGDIR_SIZE; | ||
853 | } | ||
854 | /* Check that there is no need to do the same for the modules area. */ | ||
855 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | ||
856 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | ||
857 | (__START_KERNEL & PGDIR_MASK))); | ||
858 | #endif | ||
743 | } | 859 | } |
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index e12c34ba4ba1..edca689c62d5 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c | |||
@@ -176,14 +176,52 @@ static void force_sig_info_fault(int si_signo, int si_code, | |||
176 | force_sig_info(si_signo, &info, tsk); | 176 | force_sig_info(si_signo, &info, tsk); |
177 | } | 177 | } |
178 | 178 | ||
179 | #ifdef CONFIG_X86_64 | ||
179 | static int bad_address(void *p) | 180 | static int bad_address(void *p) |
180 | { | 181 | { |
181 | unsigned long dummy; | 182 | unsigned long dummy; |
182 | return probe_kernel_address((unsigned long *)p, dummy); | 183 | return probe_kernel_address((unsigned long *)p, dummy); |
183 | } | 184 | } |
185 | #endif | ||
184 | 186 | ||
185 | void dump_pagetable(unsigned long address) | 187 | void dump_pagetable(unsigned long address) |
186 | { | 188 | { |
189 | #ifdef CONFIG_X86_32 | ||
190 | __typeof__(pte_val(__pte(0))) page; | ||
191 | |||
192 | page = read_cr3(); | ||
193 | page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; | ||
194 | #ifdef CONFIG_X86_PAE | ||
195 | printk("*pdpt = %016Lx ", page); | ||
196 | if ((page >> PAGE_SHIFT) < max_low_pfn | ||
197 | && page & _PAGE_PRESENT) { | ||
198 | page &= PAGE_MASK; | ||
199 | page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) | ||
200 | & (PTRS_PER_PMD - 1)]; | ||
201 | printk(KERN_CONT "*pde = %016Lx ", page); | ||
202 | page &= ~_PAGE_NX; | ||
203 | } | ||
204 | #else | ||
205 | printk("*pde = %08lx ", page); | ||
206 | #endif | ||
207 | |||
208 | /* | ||
209 | * We must not directly access the pte in the highpte | ||
210 | * case if the page table is located in highmem. | ||
211 | * And let's rather not kmap-atomic the pte, just in case | ||
212 | * it's allocated already. | ||
213 | */ | ||
214 | if ((page >> PAGE_SHIFT) < max_low_pfn | ||
215 | && (page & _PAGE_PRESENT) | ||
216 | && !(page & _PAGE_PSE)) { | ||
217 | page &= PAGE_MASK; | ||
218 | page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) | ||
219 | & (PTRS_PER_PTE - 1)]; | ||
220 | printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); | ||
221 | } | ||
222 | |||
223 | printk("\n"); | ||
224 | #else /* CONFIG_X86_64 */ | ||
187 | pgd_t *pgd; | 225 | pgd_t *pgd; |
188 | pud_t *pud; | 226 | pud_t *pud; |
189 | pmd_t *pmd; | 227 | pmd_t *pmd; |
@@ -215,7 +253,46 @@ ret: | |||
215 | return; | 253 | return; |
216 | bad: | 254 | bad: |
217 | printk("BAD\n"); | 255 | printk("BAD\n"); |
256 | #endif | ||
257 | } | ||
258 | |||
259 | #ifdef CONFIG_X86_32 | ||
260 | static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | ||
261 | { | ||
262 | unsigned index = pgd_index(address); | ||
263 | pgd_t *pgd_k; | ||
264 | pud_t *pud, *pud_k; | ||
265 | pmd_t *pmd, *pmd_k; | ||
266 | |||
267 | pgd += index; | ||
268 | pgd_k = init_mm.pgd + index; | ||
269 | |||
270 | if (!pgd_present(*pgd_k)) | ||
271 | return NULL; | ||
272 | |||
273 | /* | ||
274 | * set_pgd(pgd, *pgd_k); here would be useless on PAE | ||
275 | * and redundant with the set_pmd() on non-PAE. As would | ||
276 | * set_pud. | ||
277 | */ | ||
278 | |||
279 | pud = pud_offset(pgd, address); | ||
280 | pud_k = pud_offset(pgd_k, address); | ||
281 | if (!pud_present(*pud_k)) | ||
282 | return NULL; | ||
283 | |||
284 | pmd = pmd_offset(pud, address); | ||
285 | pmd_k = pmd_offset(pud_k, address); | ||
286 | if (!pmd_present(*pmd_k)) | ||
287 | return NULL; | ||
288 | if (!pmd_present(*pmd)) { | ||
289 | set_pmd(pmd, *pmd_k); | ||
290 | arch_flush_lazy_mmu_mode(); | ||
291 | } else | ||
292 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | ||
293 | return pmd_k; | ||
218 | } | 294 | } |
295 | #endif | ||
219 | 296 | ||
220 | #ifdef CONFIG_X86_64 | 297 | #ifdef CONFIG_X86_64 |
221 | static const char errata93_warning[] = | 298 | static const char errata93_warning[] = |
@@ -296,6 +373,32 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) | |||
296 | static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, | 373 | static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, |
297 | unsigned long address) | 374 | unsigned long address) |
298 | { | 375 | { |
376 | #ifdef CONFIG_X86_32 | ||
377 | if (!oops_may_print()) | ||
378 | return; | ||
379 | |||
380 | #ifdef CONFIG_X86_PAE | ||
381 | if (error_code & PF_INSTR) { | ||
382 | int level; | ||
383 | pte_t *pte = lookup_address(address, &level); | ||
384 | |||
385 | if (pte && pte_present(*pte) && !pte_exec(*pte)) | ||
386 | printk(KERN_CRIT "kernel tried to execute " | ||
387 | "NX-protected page - exploit attempt? " | ||
388 | "(uid: %d)\n", current->uid); | ||
389 | } | ||
390 | #endif | ||
391 | printk(KERN_ALERT "BUG: unable to handle kernel "); | ||
392 | if (address < PAGE_SIZE) | ||
393 | printk(KERN_CONT "NULL pointer dereference"); | ||
394 | else | ||
395 | printk(KERN_CONT "paging request"); | ||
396 | printk(KERN_CONT " at %08lx\n", address); | ||
397 | |||
398 | printk(KERN_ALERT "IP:"); | ||
399 | printk_address(regs->ip, 1); | ||
400 | dump_pagetable(address); | ||
401 | #else /* CONFIG_X86_64 */ | ||
299 | printk(KERN_ALERT "BUG: unable to handle kernel "); | 402 | printk(KERN_ALERT "BUG: unable to handle kernel "); |
300 | if (address < PAGE_SIZE) | 403 | if (address < PAGE_SIZE) |
301 | printk(KERN_CONT "NULL pointer dereference"); | 404 | printk(KERN_CONT "NULL pointer dereference"); |
@@ -306,8 +409,10 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
306 | printk(KERN_ALERT "IP:"); | 409 | printk(KERN_ALERT "IP:"); |
307 | printk_address(regs->ip, 1); | 410 | printk_address(regs->ip, 1); |
308 | dump_pagetable(address); | 411 | dump_pagetable(address); |
412 | #endif | ||
309 | } | 413 | } |
310 | 414 | ||
415 | #ifdef CONFIG_X86_64 | ||
311 | static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | 416 | static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, |
312 | unsigned long error_code) | 417 | unsigned long error_code) |
313 | { | 418 | { |
@@ -325,6 +430,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
325 | regs = NULL; | 430 | regs = NULL; |
326 | oops_end(flags, regs, SIGKILL); | 431 | oops_end(flags, regs, SIGKILL); |
327 | } | 432 | } |
433 | #endif | ||
328 | 434 | ||
329 | /* | 435 | /* |
330 | * Handle a fault on the vmalloc area | 436 | * Handle a fault on the vmalloc area |
@@ -590,12 +696,15 @@ bad_area: | |||
590 | bad_area_nosemaphore: | 696 | bad_area_nosemaphore: |
591 | /* User mode accesses just cause a SIGSEGV */ | 697 | /* User mode accesses just cause a SIGSEGV */ |
592 | if (error_code & PF_USER) { | 698 | if (error_code & PF_USER) { |
593 | |||
594 | /* | 699 | /* |
595 | * It's possible to have interrupts off here. | 700 | * It's possible to have interrupts off here. |
596 | */ | 701 | */ |
597 | local_irq_enable(); | 702 | local_irq_enable(); |
598 | 703 | ||
704 | /* | ||
705 | * Valid to do another page fault here because this one came | ||
706 | * from user space. | ||
707 | */ | ||
599 | if (is_prefetch(regs, address, error_code)) | 708 | if (is_prefetch(regs, address, error_code)) |
600 | return; | 709 | return; |
601 | 710 | ||
@@ -696,6 +805,42 @@ LIST_HEAD(pgd_list); | |||
696 | 805 | ||
697 | void vmalloc_sync_all(void) | 806 | void vmalloc_sync_all(void) |
698 | { | 807 | { |
808 | #ifdef CONFIG_X86_32 | ||
809 | /* | ||
810 | * Note that races in the updates of insync and start aren't | ||
811 | * problematic: insync can only get set bits added, and updates to | ||
812 | * start are only improving performance (without affecting correctness | ||
813 | * if undone). | ||
814 | */ | ||
815 | static DECLARE_BITMAP(insync, PTRS_PER_PGD); | ||
816 | static unsigned long start = TASK_SIZE; | ||
817 | unsigned long address; | ||
818 | |||
819 | if (SHARED_KERNEL_PMD) | ||
820 | return; | ||
821 | |||
822 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | ||
823 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | ||
824 | if (!test_bit(pgd_index(address), insync)) { | ||
825 | unsigned long flags; | ||
826 | struct page *page; | ||
827 | |||
828 | spin_lock_irqsave(&pgd_lock, flags); | ||
829 | for (page = pgd_list; page; page = | ||
830 | (struct page *)page->index) | ||
831 | if (!vmalloc_sync_one(page_address(page), | ||
832 | address)) { | ||
833 | BUG_ON(page != pgd_list); | ||
834 | break; | ||
835 | } | ||
836 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
837 | if (!page) | ||
838 | set_bit(pgd_index(address), insync); | ||
839 | } | ||
840 | if (address == start && test_bit(pgd_index(address), insync)) | ||
841 | start = address + PGDIR_SIZE; | ||
842 | } | ||
843 | #else /* CONFIG_X86_64 */ | ||
699 | /* | 844 | /* |
700 | * Note that races in the updates of insync and start aren't | 845 | * Note that races in the updates of insync and start aren't |
701 | * problematic: insync can only get set bits added, and updates to | 846 | * problematic: insync can only get set bits added, and updates to |
@@ -732,4 +877,5 @@ void vmalloc_sync_all(void) | |||
732 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | 877 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); |
733 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | 878 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == |
734 | (__START_KERNEL & PGDIR_MASK))); | 879 | (__START_KERNEL & PGDIR_MASK))); |
880 | #endif | ||
735 | } | 881 | } |