diff options
-rw-r--r-- | arch/i386/kernel/traps.c | 3 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 173 | ||||
-rw-r--r-- | include/asm-i386/pgtable-2level.h | 2 | ||||
-rw-r--r-- | include/asm-i386/pgtable-3level.h | 2 |
4 files changed, 123 insertions, 57 deletions
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index f20797b8da1d..d510de7e4f2a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -99,6 +99,8 @@ int register_die_notifier(struct notifier_block *nb) | |||
99 | { | 99 | { |
100 | int err = 0; | 100 | int err = 0; |
101 | unsigned long flags; | 101 | unsigned long flags; |
102 | |||
103 | vmalloc_sync_all(); | ||
102 | spin_lock_irqsave(&die_notifier_lock, flags); | 104 | spin_lock_irqsave(&die_notifier_lock, flags); |
103 | err = notifier_chain_register(&i386die_chain, nb); | 105 | err = notifier_chain_register(&i386die_chain, nb); |
104 | spin_unlock_irqrestore(&die_notifier_lock, flags); | 106 | spin_unlock_irqrestore(&die_notifier_lock, flags); |
@@ -713,6 +715,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) | |||
713 | 715 | ||
714 | void set_nmi_callback(nmi_callback_t callback) | 716 | void set_nmi_callback(nmi_callback_t callback) |
715 | { | 717 | { |
718 | vmalloc_sync_all(); | ||
716 | rcu_assign_pointer(nmi_callback, callback); | 719 | rcu_assign_pointer(nmi_callback, callback); |
717 | } | 720 | } |
718 | EXPORT_SYMBOL_GPL(set_nmi_callback); | 721 | EXPORT_SYMBOL_GPL(set_nmi_callback); |
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index cf572d9a3b6e..bbb24af5d860 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -214,6 +214,68 @@ static noinline void force_sig_info_fault(int si_signo, int si_code, | |||
214 | 214 | ||
215 | fastcall void do_invalid_op(struct pt_regs *, unsigned long); | 215 | fastcall void do_invalid_op(struct pt_regs *, unsigned long); |
216 | 216 | ||
217 | static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | ||
218 | { | ||
219 | unsigned index = pgd_index(address); | ||
220 | pgd_t *pgd_k; | ||
221 | pud_t *pud, *pud_k; | ||
222 | pmd_t *pmd, *pmd_k; | ||
223 | |||
224 | pgd += index; | ||
225 | pgd_k = init_mm.pgd + index; | ||
226 | |||
227 | if (!pgd_present(*pgd_k)) | ||
228 | return NULL; | ||
229 | |||
230 | /* | ||
231 | * set_pgd(pgd, *pgd_k); here would be useless on PAE | ||
232 | * and redundant with the set_pmd() on non-PAE. As would | ||
233 | * set_pud. | ||
234 | */ | ||
235 | |||
236 | pud = pud_offset(pgd, address); | ||
237 | pud_k = pud_offset(pgd_k, address); | ||
238 | if (!pud_present(*pud_k)) | ||
239 | return NULL; | ||
240 | |||
241 | pmd = pmd_offset(pud, address); | ||
242 | pmd_k = pmd_offset(pud_k, address); | ||
243 | if (!pmd_present(*pmd_k)) | ||
244 | return NULL; | ||
245 | if (!pmd_present(*pmd)) | ||
246 | set_pmd(pmd, *pmd_k); | ||
247 | else | ||
248 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | ||
249 | return pmd_k; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Handle a fault on the vmalloc or module mapping area | ||
254 | * | ||
255 | * This assumes no large pages in there. | ||
256 | */ | ||
257 | static inline int vmalloc_fault(unsigned long address) | ||
258 | { | ||
259 | unsigned long pgd_paddr; | ||
260 | pmd_t *pmd_k; | ||
261 | pte_t *pte_k; | ||
262 | /* | ||
263 | * Synchronize this task's top level page-table | ||
264 | * with the 'reference' page table. | ||
265 | * | ||
266 | * Do _not_ use "current" here. We might be inside | ||
267 | * an interrupt in the middle of a task switch.. | ||
268 | */ | ||
269 | pgd_paddr = read_cr3(); | ||
270 | pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); | ||
271 | if (!pmd_k) | ||
272 | return -1; | ||
273 | pte_k = pte_offset_kernel(pmd_k, address); | ||
274 | if (!pte_present(*pte_k)) | ||
275 | return -1; | ||
276 | return 0; | ||
277 | } | ||
278 | |||
217 | /* | 279 | /* |
218 | * This routine handles page faults. It determines the address, | 280 | * This routine handles page faults. It determines the address, |
219 | * and the problem, and then passes it off to one of the appropriate | 281 | * and the problem, and then passes it off to one of the appropriate |
@@ -223,6 +285,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long); | |||
223 | * bit 0 == 0 means no page found, 1 means protection fault | 285 | * bit 0 == 0 means no page found, 1 means protection fault |
224 | * bit 1 == 0 means read, 1 means write | 286 | * bit 1 == 0 means read, 1 means write |
225 | * bit 2 == 0 means kernel, 1 means user-mode | 287 | * bit 2 == 0 means kernel, 1 means user-mode |
288 | * bit 3 == 1 means use of reserved bit detected | ||
289 | * bit 4 == 1 means fault was an instruction fetch | ||
226 | */ | 290 | */ |
227 | fastcall void __kprobes do_page_fault(struct pt_regs *regs, | 291 | fastcall void __kprobes do_page_fault(struct pt_regs *regs, |
228 | unsigned long error_code) | 292 | unsigned long error_code) |
@@ -237,13 +301,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
237 | /* get the address */ | 301 | /* get the address */ |
238 | address = read_cr2(); | 302 | address = read_cr2(); |
239 | 303 | ||
240 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
241 | SIGSEGV) == NOTIFY_STOP) | ||
242 | return; | ||
243 | /* It's safe to allow irq's after cr2 has been saved */ | ||
244 | if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) | ||
245 | local_irq_enable(); | ||
246 | |||
247 | tsk = current; | 304 | tsk = current; |
248 | 305 | ||
249 | si_code = SEGV_MAPERR; | 306 | si_code = SEGV_MAPERR; |
@@ -259,17 +316,29 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
259 | * | 316 | * |
260 | * This verifies that the fault happens in kernel space | 317 | * This verifies that the fault happens in kernel space |
261 | * (error_code & 4) == 0, and that the fault was not a | 318 | * (error_code & 4) == 0, and that the fault was not a |
262 | * protection error (error_code & 1) == 0. | 319 | * protection error (error_code & 9) == 0. |
263 | */ | 320 | */ |
264 | if (unlikely(address >= TASK_SIZE)) { | 321 | if (unlikely(address >= TASK_SIZE)) { |
265 | if (!(error_code & 5)) | 322 | if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) |
266 | goto vmalloc_fault; | 323 | return; |
267 | /* | 324 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
325 | SIGSEGV) == NOTIFY_STOP) | ||
326 | return; | ||
327 | /* | ||
268 | * Don't take the mm semaphore here. If we fixup a prefetch | 328 | * Don't take the mm semaphore here. If we fixup a prefetch |
269 | * fault we could otherwise deadlock. | 329 | * fault we could otherwise deadlock. |
270 | */ | 330 | */ |
271 | goto bad_area_nosemaphore; | 331 | goto bad_area_nosemaphore; |
272 | } | 332 | } |
333 | |||
334 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
335 | SIGSEGV) == NOTIFY_STOP) | ||
336 | return; | ||
337 | |||
338 | /* It's safe to allow irq's after cr2 has been saved and the vmalloc | ||
339 | fault has been handled. */ | ||
340 | if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) | ||
341 | local_irq_enable(); | ||
273 | 342 | ||
274 | mm = tsk->mm; | 343 | mm = tsk->mm; |
275 | 344 | ||
@@ -510,51 +579,41 @@ do_sigbus: | |||
510 | tsk->thread.error_code = error_code; | 579 | tsk->thread.error_code = error_code; |
511 | tsk->thread.trap_no = 14; | 580 | tsk->thread.trap_no = 14; |
512 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 581 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); |
513 | return; | 582 | } |
514 | |||
515 | vmalloc_fault: | ||
516 | { | ||
517 | /* | ||
518 | * Synchronize this task's top level page-table | ||
519 | * with the 'reference' page table. | ||
520 | * | ||
521 | * Do _not_ use "tsk" here. We might be inside | ||
522 | * an interrupt in the middle of a task switch.. | ||
523 | */ | ||
524 | int index = pgd_index(address); | ||
525 | unsigned long pgd_paddr; | ||
526 | pgd_t *pgd, *pgd_k; | ||
527 | pud_t *pud, *pud_k; | ||
528 | pmd_t *pmd, *pmd_k; | ||
529 | pte_t *pte_k; | ||
530 | |||
531 | pgd_paddr = read_cr3(); | ||
532 | pgd = index + (pgd_t *)__va(pgd_paddr); | ||
533 | pgd_k = init_mm.pgd + index; | ||
534 | |||
535 | if (!pgd_present(*pgd_k)) | ||
536 | goto no_context; | ||
537 | |||
538 | /* | ||
539 | * set_pgd(pgd, *pgd_k); here would be useless on PAE | ||
540 | * and redundant with the set_pmd() on non-PAE. As would | ||
541 | * set_pud. | ||
542 | */ | ||
543 | 583 | ||
544 | pud = pud_offset(pgd, address); | 584 | #ifndef CONFIG_X86_PAE |
545 | pud_k = pud_offset(pgd_k, address); | 585 | void vmalloc_sync_all(void) |
546 | if (!pud_present(*pud_k)) | 586 | { |
547 | goto no_context; | 587 | /* |
548 | 588 | * Note that races in the updates of insync and start aren't | |
549 | pmd = pmd_offset(pud, address); | 589 | * problematic: insync can only get set bits added, and updates to |
550 | pmd_k = pmd_offset(pud_k, address); | 590 | * start are only improving performance (without affecting correctness |
551 | if (!pmd_present(*pmd_k)) | 591 | * if undone). |
552 | goto no_context; | 592 | */ |
553 | set_pmd(pmd, *pmd_k); | 593 | static DECLARE_BITMAP(insync, PTRS_PER_PGD); |
594 | static unsigned long start = TASK_SIZE; | ||
595 | unsigned long address; | ||
554 | 596 | ||
555 | pte_k = pte_offset_kernel(pmd_k, address); | 597 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); |
556 | if (!pte_present(*pte_k)) | 598 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { |
557 | goto no_context; | 599 | if (!test_bit(pgd_index(address), insync)) { |
558 | return; | 600 | unsigned long flags; |
601 | struct page *page; | ||
602 | |||
603 | spin_lock_irqsave(&pgd_lock, flags); | ||
604 | for (page = pgd_list; page; page = | ||
605 | (struct page *)page->index) | ||
606 | if (!vmalloc_sync_one(page_address(page), | ||
607 | address)) { | ||
608 | BUG_ON(page != pgd_list); | ||
609 | break; | ||
610 | } | ||
611 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
612 | if (!page) | ||
613 | set_bit(pgd_index(address), insync); | ||
614 | } | ||
615 | if (address == start && test_bit(pgd_index(address), insync)) | ||
616 | start = address + PGDIR_SIZE; | ||
559 | } | 617 | } |
560 | } | 618 | } |
619 | #endif | ||
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 74ef721b534d..27bde973abc7 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h | |||
@@ -61,4 +61,6 @@ static inline int pte_exec_kernel(pte_t pte) | |||
61 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) | 61 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) |
62 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | 62 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
63 | 63 | ||
64 | void vmalloc_sync_all(void); | ||
65 | |||
64 | #endif /* _I386_PGTABLE_2LEVEL_H */ | 66 | #endif /* _I386_PGTABLE_2LEVEL_H */ |
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index f1a8b454920a..36a5aa63cbbf 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h | |||
@@ -152,4 +152,6 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | |||
152 | 152 | ||
153 | #define __pmd_free_tlb(tlb, x) do { } while (0) | 153 | #define __pmd_free_tlb(tlb, x) do { } while (0) |
154 | 154 | ||
155 | #define vmalloc_sync_all() ((void)0) | ||
156 | |||
155 | #endif /* _I386_PGTABLE_3LEVEL_H */ | 157 | #endif /* _I386_PGTABLE_3LEVEL_H */ |