diff options
Diffstat (limited to 'arch/x86/mm/fault_32.c')
-rw-r--r-- | arch/x86/mm/fault_32.c | 100 |
1 files changed, 95 insertions, 5 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 7d9ecbbba745..4da4625c6968 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c | |||
@@ -48,7 +48,11 @@ static inline int notify_page_fault(struct pt_regs *regs) | |||
48 | int ret = 0; | 48 | int ret = 0; |
49 | 49 | ||
50 | /* kprobe_running() needs smp_processor_id() */ | 50 | /* kprobe_running() needs smp_processor_id() */ |
51 | #ifdef CONFIG_X86_32 | ||
51 | if (!user_mode_vm(regs)) { | 52 | if (!user_mode_vm(regs)) { |
53 | #else | ||
54 | if (!user_mode(regs)) { | ||
55 | #endif | ||
52 | preempt_disable(); | 56 | preempt_disable(); |
53 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) | 57 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) |
54 | ret = 1; | 58 | ret = 1; |
@@ -430,11 +434,15 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
430 | #endif | 434 | #endif |
431 | 435 | ||
432 | /* | 436 | /* |
437 | * X86_32 | ||
433 | * Handle a fault on the vmalloc or module mapping area | 438 | * Handle a fault on the vmalloc or module mapping area |
434 | * | 439 | * |
440 | * X86_64 | ||
441 | * Handle a fault on the vmalloc area | ||
442 | * | ||
435 | * This assumes no large pages in there. | 443 | * This assumes no large pages in there. |
436 | */ | 444 | */ |
437 | static inline int vmalloc_fault(unsigned long address) | 445 | static int vmalloc_fault(unsigned long address) |
438 | { | 446 | { |
439 | #ifdef CONFIG_X86_32 | 447 | #ifdef CONFIG_X86_32 |
440 | unsigned long pgd_paddr; | 448 | unsigned long pgd_paddr; |
@@ -509,6 +517,9 @@ int show_unhandled_signals = 1; | |||
509 | * and the problem, and then passes it off to one of the appropriate | 517 | * and the problem, and then passes it off to one of the appropriate |
510 | * routines. | 518 | * routines. |
511 | */ | 519 | */ |
520 | #ifdef CONFIG_X86_64 | ||
521 | asmlinkage | ||
522 | #endif | ||
512 | void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | 523 | void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) |
513 | { | 524 | { |
514 | struct task_struct *tsk; | 525 | struct task_struct *tsk; |
@@ -517,6 +528,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
517 | unsigned long address; | 528 | unsigned long address; |
518 | int write, si_code; | 529 | int write, si_code; |
519 | int fault; | 530 | int fault; |
531 | #ifdef CONFIG_X86_64 | ||
532 | unsigned long flags; | ||
533 | #endif | ||
520 | 534 | ||
521 | /* | 535 | /* |
522 | * We can fault from pretty much anywhere, with unknown IRQ state. | 536 | * We can fault from pretty much anywhere, with unknown IRQ state. |
@@ -548,6 +562,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
548 | * (error_code & 4) == 0, and that the fault was not a | 562 | * (error_code & 4) == 0, and that the fault was not a |
549 | * protection error (error_code & 9) == 0. | 563 | * protection error (error_code & 9) == 0. |
550 | */ | 564 | */ |
565 | #ifdef CONFIG_X86_32 | ||
551 | if (unlikely(address >= TASK_SIZE)) { | 566 | if (unlikely(address >= TASK_SIZE)) { |
552 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 567 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && |
553 | vmalloc_fault(address) >= 0) | 568 | vmalloc_fault(address) >= 0) |
@@ -570,7 +585,45 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
570 | */ | 585 | */ |
571 | if (in_atomic() || !mm) | 586 | if (in_atomic() || !mm) |
572 | goto bad_area_nosemaphore; | 587 | goto bad_area_nosemaphore; |
588 | #else /* CONFIG_X86_64 */ | ||
589 | if (unlikely(address >= TASK_SIZE64)) { | ||
590 | /* | ||
591 | * Don't check for the module range here: its PML4 | ||
592 | * is always initialized because it's shared with the main | ||
593 | * kernel text. Only vmalloc may need PML4 syncups. | ||
594 | */ | ||
595 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | ||
596 | ((address >= VMALLOC_START && address < VMALLOC_END))) { | ||
597 | if (vmalloc_fault(address) >= 0) | ||
598 | return; | ||
599 | } | ||
600 | /* | ||
601 | * Don't take the mm semaphore here. If we fixup a prefetch | ||
602 | * fault we could otherwise deadlock. | ||
603 | */ | ||
604 | goto bad_area_nosemaphore; | ||
605 | } | ||
606 | if (likely(regs->flags & X86_EFLAGS_IF)) | ||
607 | local_irq_enable(); | ||
608 | |||
609 | if (unlikely(error_code & PF_RSVD)) | ||
610 | pgtable_bad(address, regs, error_code); | ||
611 | |||
612 | /* | ||
613 | * If we're in an interrupt, have no user context or are running in an | ||
614 | * atomic region then we must not take the fault. | ||
615 | */ | ||
616 | if (unlikely(in_atomic() || !mm)) | ||
617 | goto bad_area_nosemaphore; | ||
573 | 618 | ||
619 | /* | ||
620 | * User-mode registers count as a user access even for any | ||
621 | * potential system fault or CPU buglet. | ||
622 | */ | ||
623 | if (user_mode_vm(regs)) | ||
624 | error_code |= PF_USER; | ||
625 | again: | ||
626 | #endif | ||
574 | /* When running in the kernel we expect faults to occur only to | 627 | /* When running in the kernel we expect faults to occur only to |
575 | * addresses in user space. All other faults represent errors in the | 628 | * addresses in user space. All other faults represent errors in the |
576 | * kernel and should generate an OOPS. Unfortunately, in the case of an | 629 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
@@ -596,7 +649,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
596 | vma = find_vma(mm, address); | 649 | vma = find_vma(mm, address); |
597 | if (!vma) | 650 | if (!vma) |
598 | goto bad_area; | 651 | goto bad_area; |
652 | #ifdef CONFIG_X86_32 | ||
599 | if (vma->vm_start <= address) | 653 | if (vma->vm_start <= address) |
654 | #else | ||
655 | if (likely(vma->vm_start <= address)) | ||
656 | #endif | ||
600 | goto good_area; | 657 | goto good_area; |
601 | if (!(vma->vm_flags & VM_GROWSDOWN)) | 658 | if (!(vma->vm_flags & VM_GROWSDOWN)) |
602 | goto bad_area; | 659 | goto bad_area; |
@@ -634,7 +691,9 @@ good_area: | |||
634 | goto bad_area; | 691 | goto bad_area; |
635 | } | 692 | } |
636 | 693 | ||
637 | survive: | 694 | #ifdef CONFIG_X86_32 |
695 | survive: | ||
696 | #endif | ||
638 | /* | 697 | /* |
639 | * If for any reason at all we couldn't handle the fault, | 698 | * If for any reason at all we couldn't handle the fault, |
640 | * make sure we exit gracefully rather than endlessly redo | 699 | * make sure we exit gracefully rather than endlessly redo |
@@ -705,6 +764,7 @@ bad_area_nosemaphore: | |||
705 | print_vma_addr(" in ", regs->ip); | 764 | print_vma_addr(" in ", regs->ip); |
706 | printk("\n"); | 765 | printk("\n"); |
707 | } | 766 | } |
767 | |||
708 | tsk->thread.cr2 = address; | 768 | tsk->thread.cr2 = address; |
709 | /* Kernel addresses are always protection faults */ | 769 | /* Kernel addresses are always protection faults */ |
710 | tsk->thread.error_code = error_code | (address >= TASK_SIZE); | 770 | tsk->thread.error_code = error_code | (address >= TASK_SIZE); |
@@ -722,9 +782,13 @@ no_context: | |||
722 | return; | 782 | return; |
723 | 783 | ||
724 | /* | 784 | /* |
785 | * X86_32 | ||
725 | * Valid to do another page fault here, because if this fault | 786 | * Valid to do another page fault here, because if this fault |
726 | * had been triggered by is_prefetch fixup_exception would have | 787 | * had been triggered by is_prefetch fixup_exception would have |
727 | * handled it. | 788 | * handled it. |
789 | * | ||
790 | * X86_64 | ||
791 | * Hall of shame of CPU/BIOS bugs. | ||
728 | */ | 792 | */ |
729 | if (is_prefetch(regs, address, error_code)) | 793 | if (is_prefetch(regs, address, error_code)) |
730 | return; | 794 | return; |
@@ -736,7 +800,7 @@ no_context: | |||
736 | * Oops. The kernel tried to access some bad page. We'll have to | 800 | * Oops. The kernel tried to access some bad page. We'll have to |
737 | * terminate things with extreme prejudice. | 801 | * terminate things with extreme prejudice. |
738 | */ | 802 | */ |
739 | 803 | #ifdef CONFIG_X86_32 | |
740 | bust_spinlocks(1); | 804 | bust_spinlocks(1); |
741 | 805 | ||
742 | show_fault_oops(regs, error_code, address); | 806 | show_fault_oops(regs, error_code, address); |
@@ -747,6 +811,20 @@ no_context: | |||
747 | die("Oops", regs, error_code); | 811 | die("Oops", regs, error_code); |
748 | bust_spinlocks(0); | 812 | bust_spinlocks(0); |
749 | do_exit(SIGKILL); | 813 | do_exit(SIGKILL); |
814 | #else /* CONFIG_X86_64 */ | ||
815 | flags = oops_begin(); | ||
816 | |||
817 | show_fault_oops(regs, error_code, address); | ||
818 | |||
819 | tsk->thread.cr2 = address; | ||
820 | tsk->thread.trap_no = 14; | ||
821 | tsk->thread.error_code = error_code; | ||
822 | if (__die("Oops", regs, error_code)) | ||
823 | regs = NULL; | ||
824 | /* Executive summary in case the body of the oops scrolled away */ | ||
825 | printk(KERN_EMERG "CR2: %016lx\n", address); | ||
826 | oops_end(flags, regs, SIGKILL); | ||
827 | #endif | ||
750 | 828 | ||
751 | /* | 829 | /* |
752 | * We ran out of memory, or some other thing happened to us that made | 830 | * We ran out of memory, or some other thing happened to us that made |
@@ -754,11 +832,18 @@ no_context: | |||
754 | */ | 832 | */ |
755 | out_of_memory: | 833 | out_of_memory: |
756 | up_read(&mm->mmap_sem); | 834 | up_read(&mm->mmap_sem); |
835 | #ifdef CONFIG_X86_32 | ||
757 | if (is_global_init(tsk)) { | 836 | if (is_global_init(tsk)) { |
758 | yield(); | 837 | yield(); |
759 | down_read(&mm->mmap_sem); | 838 | down_read(&mm->mmap_sem); |
760 | goto survive; | 839 | goto survive; |
761 | } | 840 | } |
841 | #else | ||
842 | if (is_global_init(current)) { | ||
843 | yield(); | ||
844 | goto again; | ||
845 | } | ||
846 | #endif | ||
762 | printk("VM: killing process %s\n", tsk->comm); | 847 | printk("VM: killing process %s\n", tsk->comm); |
763 | if (error_code & PF_USER) | 848 | if (error_code & PF_USER) |
764 | do_group_exit(SIGKILL); | 849 | do_group_exit(SIGKILL); |
@@ -770,17 +855,22 @@ do_sigbus: | |||
770 | /* Kernel mode? Handle exceptions or die */ | 855 | /* Kernel mode? Handle exceptions or die */ |
771 | if (!(error_code & PF_USER)) | 856 | if (!(error_code & PF_USER)) |
772 | goto no_context; | 857 | goto no_context; |
773 | 858 | #ifdef CONFIG_X86_32 | |
774 | /* User space => ok to do another page fault */ | 859 | /* User space => ok to do another page fault */ |
775 | if (is_prefetch(regs, address, error_code)) | 860 | if (is_prefetch(regs, address, error_code)) |
776 | return; | 861 | return; |
777 | 862 | #endif | |
778 | tsk->thread.cr2 = address; | 863 | tsk->thread.cr2 = address; |
779 | tsk->thread.error_code = error_code; | 864 | tsk->thread.error_code = error_code; |
780 | tsk->thread.trap_no = 14; | 865 | tsk->thread.trap_no = 14; |
781 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 866 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); |
782 | } | 867 | } |
783 | 868 | ||
869 | #ifdef CONFIG_X86_64 | ||
870 | DEFINE_SPINLOCK(pgd_lock); | ||
871 | LIST_HEAD(pgd_list); | ||
872 | #endif | ||
873 | |||
784 | void vmalloc_sync_all(void) | 874 | void vmalloc_sync_all(void) |
785 | { | 875 | { |
786 | #ifdef CONFIG_X86_32 | 876 | #ifdef CONFIG_X86_32 |