aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/mm/fault_32.c100
-rw-r--r--arch/x86/mm/fault_64.c93
2 files changed, 177 insertions, 16 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 7d9ecbbba745..4da4625c6968 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -48,7 +48,11 @@ static inline int notify_page_fault(struct pt_regs *regs)
48 int ret = 0; 48 int ret = 0;
49 49
50 /* kprobe_running() needs smp_processor_id() */ 50 /* kprobe_running() needs smp_processor_id() */
51#ifdef CONFIG_X86_32
51 if (!user_mode_vm(regs)) { 52 if (!user_mode_vm(regs)) {
53#else
54 if (!user_mode(regs)) {
55#endif
52 preempt_disable(); 56 preempt_disable();
53 if (kprobe_running() && kprobe_fault_handler(regs, 14)) 57 if (kprobe_running() && kprobe_fault_handler(regs, 14))
54 ret = 1; 58 ret = 1;
@@ -430,11 +434,15 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
430#endif 434#endif
431 435
432/* 436/*
437 * X86_32
433 * Handle a fault on the vmalloc or module mapping area 438 * Handle a fault on the vmalloc or module mapping area
434 * 439 *
440 * X86_64
441 * Handle a fault on the vmalloc area
442 *
435 * This assumes no large pages in there. 443 * This assumes no large pages in there.
436 */ 444 */
437static inline int vmalloc_fault(unsigned long address) 445static int vmalloc_fault(unsigned long address)
438{ 446{
439#ifdef CONFIG_X86_32 447#ifdef CONFIG_X86_32
440 unsigned long pgd_paddr; 448 unsigned long pgd_paddr;
@@ -509,6 +517,9 @@ int show_unhandled_signals = 1;
509 * and the problem, and then passes it off to one of the appropriate 517 * and the problem, and then passes it off to one of the appropriate
510 * routines. 518 * routines.
511 */ 519 */
520#ifdef CONFIG_X86_64
521asmlinkage
522#endif
512void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) 523void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
513{ 524{
514 struct task_struct *tsk; 525 struct task_struct *tsk;
@@ -517,6 +528,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
517 unsigned long address; 528 unsigned long address;
518 int write, si_code; 529 int write, si_code;
519 int fault; 530 int fault;
531#ifdef CONFIG_X86_64
532 unsigned long flags;
533#endif
520 534
521 /* 535 /*
522 * We can fault from pretty much anywhere, with unknown IRQ state. 536 * We can fault from pretty much anywhere, with unknown IRQ state.
@@ -548,6 +562,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
548 * (error_code & 4) == 0, and that the fault was not a 562 * (error_code & 4) == 0, and that the fault was not a
549 * protection error (error_code & 9) == 0. 563 * protection error (error_code & 9) == 0.
550 */ 564 */
565#ifdef CONFIG_X86_32
551 if (unlikely(address >= TASK_SIZE)) { 566 if (unlikely(address >= TASK_SIZE)) {
552 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && 567 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
553 vmalloc_fault(address) >= 0) 568 vmalloc_fault(address) >= 0)
@@ -570,7 +585,45 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
570 */ 585 */
571 if (in_atomic() || !mm) 586 if (in_atomic() || !mm)
572 goto bad_area_nosemaphore; 587 goto bad_area_nosemaphore;
588#else /* CONFIG_X86_64 */
589 if (unlikely(address >= TASK_SIZE64)) {
590 /*
591 * Don't check for the module range here: its PML4
592 * is always initialized because it's shared with the main
593 * kernel text. Only vmalloc may need PML4 syncups.
594 */
595 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
596 ((address >= VMALLOC_START && address < VMALLOC_END))) {
597 if (vmalloc_fault(address) >= 0)
598 return;
599 }
600 /*
601 * Don't take the mm semaphore here. If we fixup a prefetch
602 * fault we could otherwise deadlock.
603 */
604 goto bad_area_nosemaphore;
605 }
606 if (likely(regs->flags & X86_EFLAGS_IF))
607 local_irq_enable();
608
609 if (unlikely(error_code & PF_RSVD))
610 pgtable_bad(address, regs, error_code);
611
612 /*
613 * If we're in an interrupt, have no user context or are running in an
614 * atomic region then we must not take the fault.
615 */
616 if (unlikely(in_atomic() || !mm))
617 goto bad_area_nosemaphore;
573 618
619 /*
620 * User-mode registers count as a user access even for any
621 * potential system fault or CPU buglet.
622 */
623 if (user_mode_vm(regs))
624 error_code |= PF_USER;
625again:
626#endif
574 /* When running in the kernel we expect faults to occur only to 627 /* When running in the kernel we expect faults to occur only to
575 * addresses in user space. All other faults represent errors in the 628 * addresses in user space. All other faults represent errors in the
576 * kernel and should generate an OOPS. Unfortunately, in the case of an 629 * kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -596,7 +649,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
596 vma = find_vma(mm, address); 649 vma = find_vma(mm, address);
597 if (!vma) 650 if (!vma)
598 goto bad_area; 651 goto bad_area;
652#ifdef CONFIG_X86_32
599 if (vma->vm_start <= address) 653 if (vma->vm_start <= address)
654#else
655 if (likely(vma->vm_start <= address))
656#endif
600 goto good_area; 657 goto good_area;
601 if (!(vma->vm_flags & VM_GROWSDOWN)) 658 if (!(vma->vm_flags & VM_GROWSDOWN))
602 goto bad_area; 659 goto bad_area;
@@ -634,7 +691,9 @@ good_area:
634 goto bad_area; 691 goto bad_area;
635 } 692 }
636 693
637 survive: 694#ifdef CONFIG_X86_32
695survive:
696#endif
638 /* 697 /*
639 * If for any reason at all we couldn't handle the fault, 698 * If for any reason at all we couldn't handle the fault,
640 * make sure we exit gracefully rather than endlessly redo 699 * make sure we exit gracefully rather than endlessly redo
@@ -705,6 +764,7 @@ bad_area_nosemaphore:
705 print_vma_addr(" in ", regs->ip); 764 print_vma_addr(" in ", regs->ip);
706 printk("\n"); 765 printk("\n");
707 } 766 }
767
708 tsk->thread.cr2 = address; 768 tsk->thread.cr2 = address;
709 /* Kernel addresses are always protection faults */ 769 /* Kernel addresses are always protection faults */
710 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 770 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
@@ -722,9 +782,13 @@ no_context:
722 return; 782 return;
723 783
724 /* 784 /*
785 * X86_32
725 * Valid to do another page fault here, because if this fault 786 * Valid to do another page fault here, because if this fault
726 * had been triggered by is_prefetch fixup_exception would have 787 * had been triggered by is_prefetch fixup_exception would have
727 * handled it. 788 * handled it.
789 *
790 * X86_64
791 * Hall of shame of CPU/BIOS bugs.
728 */ 792 */
729 if (is_prefetch(regs, address, error_code)) 793 if (is_prefetch(regs, address, error_code))
730 return; 794 return;
@@ -736,7 +800,7 @@ no_context:
736 * Oops. The kernel tried to access some bad page. We'll have to 800 * Oops. The kernel tried to access some bad page. We'll have to
737 * terminate things with extreme prejudice. 801 * terminate things with extreme prejudice.
738 */ 802 */
739 803#ifdef CONFIG_X86_32
740 bust_spinlocks(1); 804 bust_spinlocks(1);
741 805
742 show_fault_oops(regs, error_code, address); 806 show_fault_oops(regs, error_code, address);
@@ -747,6 +811,20 @@ no_context:
747 die("Oops", regs, error_code); 811 die("Oops", regs, error_code);
748 bust_spinlocks(0); 812 bust_spinlocks(0);
749 do_exit(SIGKILL); 813 do_exit(SIGKILL);
814#else /* CONFIG_X86_64 */
815 flags = oops_begin();
816
817 show_fault_oops(regs, error_code, address);
818
819 tsk->thread.cr2 = address;
820 tsk->thread.trap_no = 14;
821 tsk->thread.error_code = error_code;
822 if (__die("Oops", regs, error_code))
823 regs = NULL;
824 /* Executive summary in case the body of the oops scrolled away */
825 printk(KERN_EMERG "CR2: %016lx\n", address);
826 oops_end(flags, regs, SIGKILL);
827#endif
750 828
751/* 829/*
752 * We ran out of memory, or some other thing happened to us that made 830 * We ran out of memory, or some other thing happened to us that made
@@ -754,11 +832,18 @@ no_context:
754 */ 832 */
755out_of_memory: 833out_of_memory:
756 up_read(&mm->mmap_sem); 834 up_read(&mm->mmap_sem);
835#ifdef CONFIG_X86_32
757 if (is_global_init(tsk)) { 836 if (is_global_init(tsk)) {
758 yield(); 837 yield();
759 down_read(&mm->mmap_sem); 838 down_read(&mm->mmap_sem);
760 goto survive; 839 goto survive;
761 } 840 }
841#else
842 if (is_global_init(current)) {
843 yield();
844 goto again;
845 }
846#endif
762 printk("VM: killing process %s\n", tsk->comm); 847 printk("VM: killing process %s\n", tsk->comm);
763 if (error_code & PF_USER) 848 if (error_code & PF_USER)
764 do_group_exit(SIGKILL); 849 do_group_exit(SIGKILL);
@@ -770,17 +855,22 @@ do_sigbus:
770 /* Kernel mode? Handle exceptions or die */ 855 /* Kernel mode? Handle exceptions or die */
771 if (!(error_code & PF_USER)) 856 if (!(error_code & PF_USER))
772 goto no_context; 857 goto no_context;
773 858#ifdef CONFIG_X86_32
774 /* User space => ok to do another page fault */ 859 /* User space => ok to do another page fault */
775 if (is_prefetch(regs, address, error_code)) 860 if (is_prefetch(regs, address, error_code))
776 return; 861 return;
777 862#endif
778 tsk->thread.cr2 = address; 863 tsk->thread.cr2 = address;
779 tsk->thread.error_code = error_code; 864 tsk->thread.error_code = error_code;
780 tsk->thread.trap_no = 14; 865 tsk->thread.trap_no = 14;
781 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 866 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
782} 867}
783 868
869#ifdef CONFIG_X86_64
870DEFINE_SPINLOCK(pgd_lock);
871LIST_HEAD(pgd_list);
872#endif
873
784void vmalloc_sync_all(void) 874void vmalloc_sync_all(void)
785{ 875{
786#ifdef CONFIG_X86_32 876#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index edca689c62d5..0902719388bc 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -51,7 +51,11 @@ static inline int notify_page_fault(struct pt_regs *regs)
51 int ret = 0; 51 int ret = 0;
52 52
53 /* kprobe_running() needs smp_processor_id() */ 53 /* kprobe_running() needs smp_processor_id() */
54#ifdef CONFIG_X86_32
55 if (!user_mode_vm(regs)) {
56#else
54 if (!user_mode(regs)) { 57 if (!user_mode(regs)) {
58#endif
55 preempt_disable(); 59 preempt_disable();
56 if (kprobe_running() && kprobe_fault_handler(regs, 14)) 60 if (kprobe_running() && kprobe_fault_handler(regs, 14))
57 ret = 1; 61 ret = 1;
@@ -433,6 +437,10 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
433#endif 437#endif
434 438
435/* 439/*
440 * X86_32
441 * Handle a fault on the vmalloc or module mapping area
442 *
443 * X86_64
436 * Handle a fault on the vmalloc area 444 * Handle a fault on the vmalloc area
437 * 445 *
438 * This assumes no large pages in there. 446 * This assumes no large pages in there.
@@ -512,16 +520,20 @@ int show_unhandled_signals = 1;
512 * and the problem, and then passes it off to one of the appropriate 520 * and the problem, and then passes it off to one of the appropriate
513 * routines. 521 * routines.
514 */ 522 */
515asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, 523#ifdef CONFIG_X86_64
516 unsigned long error_code) 524asmlinkage
525#endif
526void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
517{ 527{
518 struct task_struct *tsk; 528 struct task_struct *tsk;
519 struct mm_struct *mm; 529 struct mm_struct *mm;
520 struct vm_area_struct *vma; 530 struct vm_area_struct *vma;
521 unsigned long address; 531 unsigned long address;
522 int write, fault; 532 int write, si_code;
533 int fault;
534#ifdef CONFIG_X86_64
523 unsigned long flags; 535 unsigned long flags;
524 int si_code; 536#endif
525 537
526 /* 538 /*
527 * We can fault from pretty much anywhere, with unknown IRQ state. 539 * We can fault from pretty much anywhere, with unknown IRQ state.
@@ -553,6 +565,30 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
553 * (error_code & 4) == 0, and that the fault was not a 565 * (error_code & 4) == 0, and that the fault was not a
554 * protection error (error_code & 9) == 0. 566 * protection error (error_code & 9) == 0.
555 */ 567 */
568#ifdef CONFIG_X86_32
569 if (unlikely(address >= TASK_SIZE)) {
570 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
571 vmalloc_fault(address) >= 0)
572 return;
573 /*
574 * Don't take the mm semaphore here. If we fixup a prefetch
575 * fault we could otherwise deadlock.
576 */
577 goto bad_area_nosemaphore;
578 }
579
580 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
581 fault has been handled. */
582 if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
583 local_irq_enable();
584
585 /*
586 * If we're in an interrupt, have no user context or are running in an
587 * atomic region then we must not take the fault.
588 */
589 if (in_atomic() || !mm)
590 goto bad_area_nosemaphore;
591#else /* CONFIG_X86_64 */
556 if (unlikely(address >= TASK_SIZE64)) { 592 if (unlikely(address >= TASK_SIZE64)) {
557 /* 593 /*
558 * Don't check for the module range here: its PML4 594 * Don't check for the module range here: its PML4
@@ -570,7 +606,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
570 */ 606 */
571 goto bad_area_nosemaphore; 607 goto bad_area_nosemaphore;
572 } 608 }
573
574 if (likely(regs->flags & X86_EFLAGS_IF)) 609 if (likely(regs->flags & X86_EFLAGS_IF))
575 local_irq_enable(); 610 local_irq_enable();
576 611
@@ -590,8 +625,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
590 */ 625 */
591 if (user_mode_vm(regs)) 626 if (user_mode_vm(regs))
592 error_code |= PF_USER; 627 error_code |= PF_USER;
593 628again:
594 again: 629#endif
595 /* When running in the kernel we expect faults to occur only to 630 /* When running in the kernel we expect faults to occur only to
596 * addresses in user space. All other faults represent errors in the 631 * addresses in user space. All other faults represent errors in the
597 * kernel and should generate an OOPS. Unfortunately, in the case of an 632 * kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -617,7 +652,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
617 vma = find_vma(mm, address); 652 vma = find_vma(mm, address);
618 if (!vma) 653 if (!vma)
619 goto bad_area; 654 goto bad_area;
655#ifdef CONFIG_X86_32
656 if (vma->vm_start <= address)
657#else
620 if (likely(vma->vm_start <= address)) 658 if (likely(vma->vm_start <= address))
659#endif
621 goto good_area; 660 goto good_area;
622 if (!(vma->vm_flags & VM_GROWSDOWN)) 661 if (!(vma->vm_flags & VM_GROWSDOWN))
623 goto bad_area; 662 goto bad_area;
@@ -655,6 +694,9 @@ good_area:
655 goto bad_area; 694 goto bad_area;
656 } 695 }
657 696
697#ifdef CONFIG_X86_32
698survive:
699#endif
658 /* 700 /*
659 * If for any reason at all we couldn't handle the fault, 701 * If for any reason at all we couldn't handle the fault,
660 * make sure we exit gracefully rather than endlessly redo 702 * make sure we exit gracefully rather than endlessly redo
@@ -730,7 +772,6 @@ bad_area_nosemaphore:
730 /* Kernel addresses are always protection faults */ 772 /* Kernel addresses are always protection faults */
731 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 773 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
732 tsk->thread.trap_no = 14; 774 tsk->thread.trap_no = 14;
733
734 force_sig_info_fault(SIGSEGV, si_code, address, tsk); 775 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
735 return; 776 return;
736 } 777 }
@@ -744,9 +785,14 @@ no_context:
744 return; 785 return;
745 786
746 /* 787 /*
788 * X86_32
789 * Valid to do another page fault here, because if this fault
790 * had been triggered by is_prefetch fixup_exception would have
791 * handled it.
792 *
793 * X86_64
747 * Hall of shame of CPU/BIOS bugs. 794 * Hall of shame of CPU/BIOS bugs.
748 */ 795 */
749
750 if (is_prefetch(regs, address, error_code)) 796 if (is_prefetch(regs, address, error_code))
751 return; 797 return;
752 798
@@ -757,7 +803,18 @@ no_context:
757 * Oops. The kernel tried to access some bad page. We'll have to 803 * Oops. The kernel tried to access some bad page. We'll have to
758 * terminate things with extreme prejudice. 804 * terminate things with extreme prejudice.
759 */ 805 */
806#ifdef CONFIG_X86_32
807 bust_spinlocks(1);
808
809 show_fault_oops(regs, error_code, address);
760 810
811 tsk->thread.cr2 = address;
812 tsk->thread.trap_no = 14;
813 tsk->thread.error_code = error_code;
814 die("Oops", regs, error_code);
815 bust_spinlocks(0);
816 do_exit(SIGKILL);
817#else /* CONFIG_X86_64 */
761 flags = oops_begin(); 818 flags = oops_begin();
762 819
763 show_fault_oops(regs, error_code, address); 820 show_fault_oops(regs, error_code, address);
@@ -770,6 +827,7 @@ no_context:
770 /* Executive summary in case the body of the oops scrolled away */ 827 /* Executive summary in case the body of the oops scrolled away */
771 printk(KERN_EMERG "CR2: %016lx\n", address); 828 printk(KERN_EMERG "CR2: %016lx\n", address);
772 oops_end(flags, regs, SIGKILL); 829 oops_end(flags, regs, SIGKILL);
830#endif
773 831
774/* 832/*
775 * We ran out of memory, or some other thing happened to us that made 833 * We ran out of memory, or some other thing happened to us that made
@@ -777,10 +835,18 @@ no_context:
777 */ 835 */
778out_of_memory: 836out_of_memory:
779 up_read(&mm->mmap_sem); 837 up_read(&mm->mmap_sem);
838#ifdef CONFIG_X86_32
839 if (is_global_init(tsk)) {
840 yield();
841 down_read(&mm->mmap_sem);
842 goto survive;
843 }
844#else
780 if (is_global_init(current)) { 845 if (is_global_init(current)) {
781 yield(); 846 yield();
782 goto again; 847 goto again;
783 } 848 }
849#endif
784 printk("VM: killing process %s\n", tsk->comm); 850 printk("VM: killing process %s\n", tsk->comm);
785 if (error_code & PF_USER) 851 if (error_code & PF_USER)
786 do_group_exit(SIGKILL); 852 do_group_exit(SIGKILL);
@@ -792,16 +858,21 @@ do_sigbus:
792 /* Kernel mode? Handle exceptions or die */ 858 /* Kernel mode? Handle exceptions or die */
793 if (!(error_code & PF_USER)) 859 if (!(error_code & PF_USER))
794 goto no_context; 860 goto no_context;
795 861#ifdef CONFIG_X86_32
862 /* User space => ok to do another page fault */
863 if (is_prefetch(regs, address, error_code))
864 return;
865#endif
796 tsk->thread.cr2 = address; 866 tsk->thread.cr2 = address;
797 tsk->thread.error_code = error_code; 867 tsk->thread.error_code = error_code;
798 tsk->thread.trap_no = 14; 868 tsk->thread.trap_no = 14;
799 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 869 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
800 return;
801} 870}
802 871
872#ifdef CONFIG_X86_64
803DEFINE_SPINLOCK(pgd_lock); 873DEFINE_SPINLOCK(pgd_lock);
804LIST_HEAD(pgd_list); 874LIST_HEAD(pgd_list);
875#endif
805 876
806void vmalloc_sync_all(void) 877void vmalloc_sync_all(void)
807{ 878{