aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/mm/fault.c')
-rw-r--r--arch/i386/mm/fault.c210
1 files changed, 138 insertions, 72 deletions
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index cf572d9a3b6e..7f0fcf219a26 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -214,6 +214,68 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
214 214
215fastcall void do_invalid_op(struct pt_regs *, unsigned long); 215fastcall void do_invalid_op(struct pt_regs *, unsigned long);
216 216
217static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
218{
219 unsigned index = pgd_index(address);
220 pgd_t *pgd_k;
221 pud_t *pud, *pud_k;
222 pmd_t *pmd, *pmd_k;
223
224 pgd += index;
225 pgd_k = init_mm.pgd + index;
226
227 if (!pgd_present(*pgd_k))
228 return NULL;
229
230 /*
231 * set_pgd(pgd, *pgd_k); here would be useless on PAE
232 * and redundant with the set_pmd() on non-PAE. As would
233 * set_pud.
234 */
235
236 pud = pud_offset(pgd, address);
237 pud_k = pud_offset(pgd_k, address);
238 if (!pud_present(*pud_k))
239 return NULL;
240
241 pmd = pmd_offset(pud, address);
242 pmd_k = pmd_offset(pud_k, address);
243 if (!pmd_present(*pmd_k))
244 return NULL;
245 if (!pmd_present(*pmd))
246 set_pmd(pmd, *pmd_k);
247 else
248 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
249 return pmd_k;
250}
251
252/*
253 * Handle a fault on the vmalloc or module mapping area
254 *
255 * This assumes no large pages in there.
256 */
257static inline int vmalloc_fault(unsigned long address)
258{
259 unsigned long pgd_paddr;
260 pmd_t *pmd_k;
261 pte_t *pte_k;
262 /*
263 * Synchronize this task's top level page-table
264 * with the 'reference' page table.
265 *
266 * Do _not_ use "current" here. We might be inside
267 * an interrupt in the middle of a task switch..
268 */
269 pgd_paddr = read_cr3();
270 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
271 if (!pmd_k)
272 return -1;
273 pte_k = pte_offset_kernel(pmd_k, address);
274 if (!pte_present(*pte_k))
275 return -1;
276 return 0;
277}
278
217/* 279/*
218 * This routine handles page faults. It determines the address, 280 * This routine handles page faults. It determines the address,
219 * and the problem, and then passes it off to one of the appropriate 281 * and the problem, and then passes it off to one of the appropriate
@@ -223,6 +285,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long);
223 * bit 0 == 0 means no page found, 1 means protection fault 285 * bit 0 == 0 means no page found, 1 means protection fault
224 * bit 1 == 0 means read, 1 means write 286 * bit 1 == 0 means read, 1 means write
225 * bit 2 == 0 means kernel, 1 means user-mode 287 * bit 2 == 0 means kernel, 1 means user-mode
288 * bit 3 == 1 means use of reserved bit detected
289 * bit 4 == 1 means fault was an instruction fetch
226 */ 290 */
227fastcall void __kprobes do_page_fault(struct pt_regs *regs, 291fastcall void __kprobes do_page_fault(struct pt_regs *regs,
228 unsigned long error_code) 292 unsigned long error_code)
@@ -237,13 +301,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
237 /* get the address */ 301 /* get the address */
238 address = read_cr2(); 302 address = read_cr2();
239 303
240 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
241 SIGSEGV) == NOTIFY_STOP)
242 return;
243 /* It's safe to allow irq's after cr2 has been saved */
244 if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
245 local_irq_enable();
246
247 tsk = current; 304 tsk = current;
248 305
249 si_code = SEGV_MAPERR; 306 si_code = SEGV_MAPERR;
@@ -259,17 +316,29 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
259 * 316 *
260 * This verifies that the fault happens in kernel space 317 * This verifies that the fault happens in kernel space
261 * (error_code & 4) == 0, and that the fault was not a 318 * (error_code & 4) == 0, and that the fault was not a
262 * protection error (error_code & 1) == 0. 319 * protection error (error_code & 9) == 0.
263 */ 320 */
264 if (unlikely(address >= TASK_SIZE)) { 321 if (unlikely(address >= TASK_SIZE)) {
265 if (!(error_code & 5)) 322 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
266 goto vmalloc_fault; 323 return;
267 /* 324 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
325 SIGSEGV) == NOTIFY_STOP)
326 return;
327 /*
268 * Don't take the mm semaphore here. If we fixup a prefetch 328 * Don't take the mm semaphore here. If we fixup a prefetch
269 * fault we could otherwise deadlock. 329 * fault we could otherwise deadlock.
270 */ 330 */
271 goto bad_area_nosemaphore; 331 goto bad_area_nosemaphore;
272 } 332 }
333
334 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
335 SIGSEGV) == NOTIFY_STOP)
336 return;
337
338 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
339 fault has been handled. */
340 if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
341 local_irq_enable();
273 342
274 mm = tsk->mm; 343 mm = tsk->mm;
275 344
@@ -440,24 +509,31 @@ no_context:
440 509
441 bust_spinlocks(1); 510 bust_spinlocks(1);
442 511
443#ifdef CONFIG_X86_PAE 512 if (oops_may_print()) {
444 if (error_code & 16) { 513 #ifdef CONFIG_X86_PAE
445 pte_t *pte = lookup_address(address); 514 if (error_code & 16) {
515 pte_t *pte = lookup_address(address);
446 516
447 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) 517 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
448 printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid); 518 printk(KERN_CRIT "kernel tried to execute "
519 "NX-protected page - exploit attempt? "
520 "(uid: %d)\n", current->uid);
521 }
522 #endif
523 if (address < PAGE_SIZE)
524 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
525 "pointer dereference");
526 else
527 printk(KERN_ALERT "BUG: unable to handle kernel paging"
528 " request");
529 printk(" at virtual address %08lx\n",address);
530 printk(KERN_ALERT " printing eip:\n");
531 printk("%08lx\n", regs->eip);
449 } 532 }
450#endif
451 if (address < PAGE_SIZE)
452 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
453 else
454 printk(KERN_ALERT "Unable to handle kernel paging request");
455 printk(" at virtual address %08lx\n",address);
456 printk(KERN_ALERT " printing eip:\n");
457 printk("%08lx\n", regs->eip);
458 page = read_cr3(); 533 page = read_cr3();
459 page = ((unsigned long *) __va(page))[address >> 22]; 534 page = ((unsigned long *) __va(page))[address >> 22];
460 printk(KERN_ALERT "*pde = %08lx\n", page); 535 if (oops_may_print())
536 printk(KERN_ALERT "*pde = %08lx\n", page);
461 /* 537 /*
462 * We must not directly access the pte in the highpte 538 * We must not directly access the pte in the highpte
463 * case, the page table might be allocated in highmem. 539 * case, the page table might be allocated in highmem.
@@ -465,7 +541,7 @@ no_context:
465 * it's allocated already. 541 * it's allocated already.
466 */ 542 */
467#ifndef CONFIG_HIGHPTE 543#ifndef CONFIG_HIGHPTE
468 if (page & 1) { 544 if ((page & 1) && oops_may_print()) {
469 page &= PAGE_MASK; 545 page &= PAGE_MASK;
470 address &= 0x003ff000; 546 address &= 0x003ff000;
471 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; 547 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
@@ -510,51 +586,41 @@ do_sigbus:
510 tsk->thread.error_code = error_code; 586 tsk->thread.error_code = error_code;
511 tsk->thread.trap_no = 14; 587 tsk->thread.trap_no = 14;
512 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 588 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
513 return; 589}
514
515vmalloc_fault:
516 {
517 /*
518 * Synchronize this task's top level page-table
519 * with the 'reference' page table.
520 *
521 * Do _not_ use "tsk" here. We might be inside
522 * an interrupt in the middle of a task switch..
523 */
524 int index = pgd_index(address);
525 unsigned long pgd_paddr;
526 pgd_t *pgd, *pgd_k;
527 pud_t *pud, *pud_k;
528 pmd_t *pmd, *pmd_k;
529 pte_t *pte_k;
530
531 pgd_paddr = read_cr3();
532 pgd = index + (pgd_t *)__va(pgd_paddr);
533 pgd_k = init_mm.pgd + index;
534
535 if (!pgd_present(*pgd_k))
536 goto no_context;
537
538 /*
539 * set_pgd(pgd, *pgd_k); here would be useless on PAE
540 * and redundant with the set_pmd() on non-PAE. As would
541 * set_pud.
542 */
543 590
544 pud = pud_offset(pgd, address); 591#ifndef CONFIG_X86_PAE
545 pud_k = pud_offset(pgd_k, address); 592void vmalloc_sync_all(void)
546 if (!pud_present(*pud_k)) 593{
547 goto no_context; 594 /*
548 595 * Note that races in the updates of insync and start aren't
549 pmd = pmd_offset(pud, address); 596 * problematic: insync can only get set bits added, and updates to
550 pmd_k = pmd_offset(pud_k, address); 597 * start are only improving performance (without affecting correctness
551 if (!pmd_present(*pmd_k)) 598 * if undone).
552 goto no_context; 599 */
553 set_pmd(pmd, *pmd_k); 600 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
601 static unsigned long start = TASK_SIZE;
602 unsigned long address;
554 603
555 pte_k = pte_offset_kernel(pmd_k, address); 604 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
556 if (!pte_present(*pte_k)) 605 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
557 goto no_context; 606 if (!test_bit(pgd_index(address), insync)) {
558 return; 607 unsigned long flags;
608 struct page *page;
609
610 spin_lock_irqsave(&pgd_lock, flags);
611 for (page = pgd_list; page; page =
612 (struct page *)page->index)
613 if (!vmalloc_sync_one(page_address(page),
614 address)) {
615 BUG_ON(page != pgd_list);
616 break;
617 }
618 spin_unlock_irqrestore(&pgd_lock, flags);
619 if (!page)
620 set_bit(pgd_index(address), insync);
621 }
622 if (address == start && test_bit(pgd_index(address), insync))
623 start = address + PGDIR_SIZE;
559 } 624 }
560} 625}
626#endif