diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-01-21 04:39:51 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-21 04:39:51 -0500 |
commit | 198030782cedf25391e67e7c88b04f87a5eb6563 (patch) | |
tree | 5b7368c6bf052bcb4bb273497a57900720d36f51 /arch/x86 | |
parent | 4ec71fa2d2c3f1040348f2604f4b8ccc833d1c2e (diff) | |
parent | 92181f190b649f7ef2b79cbf5c00f26ccc66da2a (diff) |
Merge branch 'x86/mm' into core/percpu
Conflicts:
arch/x86/mm/fault.c
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig.cpu | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/Kbuild | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/byteorder.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/syscall_table_32.S | 2 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 450 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 45 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 16 |
9 files changed, 291 insertions, 240 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845ae..cdf4a9623237 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -307,10 +307,10 @@ config X86_CMPXCHG | |||
307 | 307 | ||
308 | config X86_L1_CACHE_SHIFT | 308 | config X86_L1_CACHE_SHIFT |
309 | int | 309 | int |
310 | default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC | 310 | default "7" if MPENTIUM4 || MPSC |
311 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 311 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
312 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 312 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
313 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 | 313 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU |
314 | 314 | ||
315 | config X86_XADD | 315 | config X86_XADD |
316 | def_bool y | 316 | def_bool y |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a9f8a814a1f7..4a8e80cdcfa5 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -22,4 +22,3 @@ unifdef-y += unistd_32.h | |||
22 | unifdef-y += unistd_64.h | 22 | unifdef-y += unistd_64.h |
23 | unifdef-y += vm86.h | 23 | unifdef-y += vm86.h |
24 | unifdef-y += vsyscall.h | 24 | unifdef-y += vsyscall.h |
25 | unifdef-y += swab.h | ||
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index 7c49917e3d9d..b13a7a88f3eb 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_BYTEORDER_H | 1 | #ifndef _ASM_X86_BYTEORDER_H |
2 | #define _ASM_X86_BYTEORDER_H | 2 | #define _ASM_X86_BYTEORDER_H |
3 | 3 | ||
4 | #include <asm/swab.h> | ||
5 | #include <linux/byteorder/little_endian.h> | 4 | #include <linux/byteorder/little_endian.h> |
6 | 5 | ||
7 | #endif /* _ASM_X86_BYTEORDER_H */ | 6 | #endif /* _ASM_X86_BYTEORDER_H */ |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 884d985b8b82..e948b28a5a9a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -446,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
446 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 446 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
447 | struct kprobe_ctlblk *kcb) | 447 | struct kprobe_ctlblk *kcb) |
448 | { | 448 | { |
449 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) | 449 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) |
450 | if (p->ainsn.boostable == 1 && !p->post_handler) { | 450 | if (p->ainsn.boostable == 1 && !p->post_handler) { |
451 | /* Boost up -- we can execute copied instructions directly */ | 451 | /* Boost up -- we can execute copied instructions directly */ |
452 | reset_current_kprobe(); | 452 | reset_current_kprobe(); |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c3..e2e86a08f31d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -88,7 +88,7 @@ ENTRY(sys_call_table) | |||
88 | .long sys_uselib | 88 | .long sys_uselib |
89 | .long sys_swapon | 89 | .long sys_swapon |
90 | .long sys_reboot | 90 | .long sys_reboot |
91 | .long old_readdir | 91 | .long sys_old_readdir |
92 | .long old_mmap /* 90 */ | 92 | .long old_mmap /* 90 */ |
93 | .long sys_munmap | 93 | .long sys_munmap |
94 | .long sys_truncate | 94 | .long sys_truncate |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 37242c405f16..65709a6aa6ee 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -92,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs) | |||
92 | * | 92 | * |
93 | * Opcode checker based on code by Richard Brunner | 93 | * Opcode checker based on code by Richard Brunner |
94 | */ | 94 | */ |
95 | static int is_prefetch(struct pt_regs *regs, unsigned long addr, | 95 | static int is_prefetch(struct pt_regs *regs, unsigned long error_code, |
96 | unsigned long error_code) | 96 | unsigned long addr) |
97 | { | 97 | { |
98 | unsigned char *instr; | 98 | unsigned char *instr; |
99 | int scan_more = 1; | 99 | int scan_more = 1; |
@@ -410,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
410 | } | 410 | } |
411 | 411 | ||
412 | #ifdef CONFIG_X86_64 | 412 | #ifdef CONFIG_X86_64 |
413 | static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | 413 | static noinline void pgtable_bad(struct pt_regs *regs, |
414 | unsigned long error_code) | 414 | unsigned long error_code, unsigned long address) |
415 | { | 415 | { |
416 | unsigned long flags = oops_begin(); | 416 | unsigned long flags = oops_begin(); |
417 | int sig = SIGKILL; | 417 | int sig = SIGKILL; |
418 | struct task_struct *tsk; | 418 | struct task_struct *tsk = current; |
419 | 419 | ||
420 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | 420 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", |
421 | current->comm, address); | 421 | tsk->comm, address); |
422 | dump_pagetable(address); | 422 | dump_pagetable(address); |
423 | tsk = current; | 423 | tsk = current; |
424 | tsk->thread.cr2 = address; | 424 | tsk->thread.cr2 = address; |
@@ -430,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
430 | } | 430 | } |
431 | #endif | 431 | #endif |
432 | 432 | ||
433 | static noinline void no_context(struct pt_regs *regs, | ||
434 | unsigned long error_code, unsigned long address) | ||
435 | { | ||
436 | struct task_struct *tsk = current; | ||
437 | unsigned long *stackend; | ||
438 | |||
439 | #ifdef CONFIG_X86_64 | ||
440 | unsigned long flags; | ||
441 | int sig; | ||
442 | #endif | ||
443 | |||
444 | /* Are we prepared to handle this kernel fault? */ | ||
445 | if (fixup_exception(regs)) | ||
446 | return; | ||
447 | |||
448 | /* | ||
449 | * X86_32 | ||
450 | * Valid to do another page fault here, because if this fault | ||
451 | * had been triggered by is_prefetch fixup_exception would have | ||
452 | * handled it. | ||
453 | * | ||
454 | * X86_64 | ||
455 | * Hall of shame of CPU/BIOS bugs. | ||
456 | */ | ||
457 | if (is_prefetch(regs, error_code, address)) | ||
458 | return; | ||
459 | |||
460 | if (is_errata93(regs, address)) | ||
461 | return; | ||
462 | |||
463 | /* | ||
464 | * Oops. The kernel tried to access some bad page. We'll have to | ||
465 | * terminate things with extreme prejudice. | ||
466 | */ | ||
467 | #ifdef CONFIG_X86_32 | ||
468 | bust_spinlocks(1); | ||
469 | #else | ||
470 | flags = oops_begin(); | ||
471 | #endif | ||
472 | |||
473 | show_fault_oops(regs, error_code, address); | ||
474 | |||
475 | stackend = end_of_stack(tsk); | ||
476 | if (*stackend != STACK_END_MAGIC) | ||
477 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); | ||
478 | |||
479 | tsk->thread.cr2 = address; | ||
480 | tsk->thread.trap_no = 14; | ||
481 | tsk->thread.error_code = error_code; | ||
482 | |||
483 | #ifdef CONFIG_X86_32 | ||
484 | die("Oops", regs, error_code); | ||
485 | bust_spinlocks(0); | ||
486 | do_exit(SIGKILL); | ||
487 | #else | ||
488 | sig = SIGKILL; | ||
489 | if (__die("Oops", regs, error_code)) | ||
490 | sig = 0; | ||
491 | /* Executive summary in case the body of the oops scrolled away */ | ||
492 | printk(KERN_EMERG "CR2: %016lx\n", address); | ||
493 | oops_end(flags, regs, sig); | ||
494 | #endif | ||
495 | } | ||
496 | |||
497 | static void __bad_area_nosemaphore(struct pt_regs *regs, | ||
498 | unsigned long error_code, unsigned long address, | ||
499 | int si_code) | ||
500 | { | ||
501 | struct task_struct *tsk = current; | ||
502 | |||
503 | /* User mode accesses just cause a SIGSEGV */ | ||
504 | if (error_code & PF_USER) { | ||
505 | /* | ||
506 | * It's possible to have interrupts off here. | ||
507 | */ | ||
508 | local_irq_enable(); | ||
509 | |||
510 | /* | ||
511 | * Valid to do another page fault here because this one came | ||
512 | * from user space. | ||
513 | */ | ||
514 | if (is_prefetch(regs, error_code, address)) | ||
515 | return; | ||
516 | |||
517 | if (is_errata100(regs, address)) | ||
518 | return; | ||
519 | |||
520 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
521 | printk_ratelimit()) { | ||
522 | printk( | ||
523 | "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", | ||
524 | task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, | ||
525 | tsk->comm, task_pid_nr(tsk), address, | ||
526 | (void *) regs->ip, (void *) regs->sp, error_code); | ||
527 | print_vma_addr(" in ", regs->ip); | ||
528 | printk("\n"); | ||
529 | } | ||
530 | |||
531 | tsk->thread.cr2 = address; | ||
532 | /* Kernel addresses are always protection faults */ | ||
533 | tsk->thread.error_code = error_code | (address >= TASK_SIZE); | ||
534 | tsk->thread.trap_no = 14; | ||
535 | force_sig_info_fault(SIGSEGV, si_code, address, tsk); | ||
536 | return; | ||
537 | } | ||
538 | |||
539 | if (is_f00f_bug(regs, address)) | ||
540 | return; | ||
541 | |||
542 | no_context(regs, error_code, address); | ||
543 | } | ||
544 | |||
545 | static noinline void bad_area_nosemaphore(struct pt_regs *regs, | ||
546 | unsigned long error_code, unsigned long address) | ||
547 | { | ||
548 | __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); | ||
549 | } | ||
550 | |||
551 | static void __bad_area(struct pt_regs *regs, | ||
552 | unsigned long error_code, unsigned long address, | ||
553 | int si_code) | ||
554 | { | ||
555 | struct mm_struct *mm = current->mm; | ||
556 | |||
557 | /* | ||
558 | * Something tried to access memory that isn't in our memory map.. | ||
559 | * Fix it, but check if it's kernel or user first.. | ||
560 | */ | ||
561 | up_read(&mm->mmap_sem); | ||
562 | |||
563 | __bad_area_nosemaphore(regs, error_code, address, si_code); | ||
564 | } | ||
565 | |||
566 | static noinline void bad_area(struct pt_regs *regs, | ||
567 | unsigned long error_code, unsigned long address) | ||
568 | { | ||
569 | __bad_area(regs, error_code, address, SEGV_MAPERR); | ||
570 | } | ||
571 | |||
572 | static noinline void bad_area_access_error(struct pt_regs *regs, | ||
573 | unsigned long error_code, unsigned long address) | ||
574 | { | ||
575 | __bad_area(regs, error_code, address, SEGV_ACCERR); | ||
576 | } | ||
577 | |||
578 | /* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ | ||
579 | static void out_of_memory(struct pt_regs *regs, | ||
580 | unsigned long error_code, unsigned long address) | ||
581 | { | ||
582 | /* | ||
583 | * We ran out of memory, call the OOM killer, and return the userspace | ||
584 | * (which will retry the fault, or kill us if we got oom-killed). | ||
585 | */ | ||
586 | up_read(¤t->mm->mmap_sem); | ||
587 | pagefault_out_of_memory(); | ||
588 | } | ||
589 | |||
590 | static void do_sigbus(struct pt_regs *regs, | ||
591 | unsigned long error_code, unsigned long address) | ||
592 | { | ||
593 | struct task_struct *tsk = current; | ||
594 | struct mm_struct *mm = tsk->mm; | ||
595 | |||
596 | up_read(&mm->mmap_sem); | ||
597 | |||
598 | /* Kernel mode? Handle exceptions or die */ | ||
599 | if (!(error_code & PF_USER)) | ||
600 | no_context(regs, error_code, address); | ||
601 | #ifdef CONFIG_X86_32 | ||
602 | /* User space => ok to do another page fault */ | ||
603 | if (is_prefetch(regs, error_code, address)) | ||
604 | return; | ||
605 | #endif | ||
606 | tsk->thread.cr2 = address; | ||
607 | tsk->thread.error_code = error_code; | ||
608 | tsk->thread.trap_no = 14; | ||
609 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | ||
610 | } | ||
611 | |||
612 | static noinline void mm_fault_error(struct pt_regs *regs, | ||
613 | unsigned long error_code, unsigned long address, unsigned int fault) | ||
614 | { | ||
615 | if (fault & VM_FAULT_OOM) | ||
616 | out_of_memory(regs, error_code, address); | ||
617 | else if (fault & VM_FAULT_SIGBUS) | ||
618 | do_sigbus(regs, error_code, address); | ||
619 | else | ||
620 | BUG(); | ||
621 | } | ||
622 | |||
433 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) | 623 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) |
434 | { | 624 | { |
435 | if ((error_code & PF_WRITE) && !pte_write(*pte)) | 625 | if ((error_code & PF_WRITE) && !pte_write(*pte)) |
@@ -449,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
449 | * There are no security implications to leaving a stale TLB when | 639 | * There are no security implications to leaving a stale TLB when |
450 | * increasing the permissions on a page. | 640 | * increasing the permissions on a page. |
451 | */ | 641 | */ |
452 | static int spurious_fault(unsigned long address, | 642 | static noinline int spurious_fault(unsigned long error_code, |
453 | unsigned long error_code) | 643 | unsigned long address) |
454 | { | 644 | { |
455 | pgd_t *pgd; | 645 | pgd_t *pgd; |
456 | pud_t *pud; | 646 | pud_t *pud; |
@@ -495,7 +685,7 @@ static int spurious_fault(unsigned long address, | |||
495 | * | 685 | * |
496 | * This assumes no large pages in there. | 686 | * This assumes no large pages in there. |
497 | */ | 687 | */ |
498 | static int vmalloc_fault(unsigned long address) | 688 | static noinline int vmalloc_fault(unsigned long address) |
499 | { | 689 | { |
500 | #ifdef CONFIG_X86_32 | 690 | #ifdef CONFIG_X86_32 |
501 | unsigned long pgd_paddr; | 691 | unsigned long pgd_paddr; |
@@ -574,6 +764,25 @@ static int vmalloc_fault(unsigned long address) | |||
574 | 764 | ||
575 | int show_unhandled_signals = 1; | 765 | int show_unhandled_signals = 1; |
576 | 766 | ||
767 | static inline int access_error(unsigned long error_code, int write, | ||
768 | struct vm_area_struct *vma) | ||
769 | { | ||
770 | if (write) { | ||
771 | /* write, present and write, not present */ | ||
772 | if (unlikely(!(vma->vm_flags & VM_WRITE))) | ||
773 | return 1; | ||
774 | } else if (unlikely(error_code & PF_PROT)) { | ||
775 | /* read, present */ | ||
776 | return 1; | ||
777 | } else { | ||
778 | /* read, not present */ | ||
779 | if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) | ||
780 | return 1; | ||
781 | } | ||
782 | |||
783 | return 0; | ||
784 | } | ||
785 | |||
577 | /* | 786 | /* |
578 | * This routine handles page faults. It determines the address, | 787 | * This routine handles page faults. It determines the address, |
579 | * and the problem, and then passes it off to one of the appropriate | 788 | * and the problem, and then passes it off to one of the appropriate |
@@ -584,18 +793,12 @@ asmlinkage | |||
584 | #endif | 793 | #endif |
585 | void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | 794 | void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) |
586 | { | 795 | { |
796 | unsigned long address; | ||
587 | struct task_struct *tsk; | 797 | struct task_struct *tsk; |
588 | struct mm_struct *mm; | 798 | struct mm_struct *mm; |
589 | struct vm_area_struct *vma; | 799 | struct vm_area_struct *vma; |
590 | unsigned long address; | 800 | int write; |
591 | int write, si_code; | ||
592 | int fault; | 801 | int fault; |
593 | unsigned long *stackend; | ||
594 | |||
595 | #ifdef CONFIG_X86_64 | ||
596 | unsigned long flags; | ||
597 | int sig; | ||
598 | #endif | ||
599 | 802 | ||
600 | tsk = current; | 803 | tsk = current; |
601 | mm = tsk->mm; | 804 | mm = tsk->mm; |
@@ -604,9 +807,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
604 | /* get the address */ | 807 | /* get the address */ |
605 | address = read_cr2(); | 808 | address = read_cr2(); |
606 | 809 | ||
607 | si_code = SEGV_MAPERR; | 810 | if (unlikely(notify_page_fault(regs))) |
608 | |||
609 | if (notify_page_fault(regs)) | ||
610 | return; | 811 | return; |
611 | if (unlikely(kmmio_fault(regs, address))) | 812 | if (unlikely(kmmio_fault(regs, address))) |
612 | return; | 813 | return; |
@@ -634,17 +835,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
634 | return; | 835 | return; |
635 | 836 | ||
636 | /* Can handle a stale RO->RW TLB */ | 837 | /* Can handle a stale RO->RW TLB */ |
637 | if (spurious_fault(address, error_code)) | 838 | if (spurious_fault(error_code, address)) |
638 | return; | 839 | return; |
639 | 840 | ||
640 | /* | 841 | /* |
641 | * Don't take the mm semaphore here. If we fixup a prefetch | 842 | * Don't take the mm semaphore here. If we fixup a prefetch |
642 | * fault we could otherwise deadlock. | 843 | * fault we could otherwise deadlock. |
643 | */ | 844 | */ |
644 | goto bad_area_nosemaphore; | 845 | bad_area_nosemaphore(regs, error_code, address); |
846 | return; | ||
645 | } | 847 | } |
646 | 848 | ||
647 | |||
648 | /* | 849 | /* |
649 | * It's safe to allow irq's after cr2 has been saved and the | 850 | * It's safe to allow irq's after cr2 has been saved and the |
650 | * vmalloc fault has been handled. | 851 | * vmalloc fault has been handled. |
@@ -660,15 +861,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
660 | 861 | ||
661 | #ifdef CONFIG_X86_64 | 862 | #ifdef CONFIG_X86_64 |
662 | if (unlikely(error_code & PF_RSVD)) | 863 | if (unlikely(error_code & PF_RSVD)) |
663 | pgtable_bad(address, regs, error_code); | 864 | pgtable_bad(regs, error_code, address); |
664 | #endif | 865 | #endif |
665 | 866 | ||
666 | /* | 867 | /* |
667 | * If we're in an interrupt, have no user context or are running in an | 868 | * If we're in an interrupt, have no user context or are running in an |
668 | * atomic region then we must not take the fault. | 869 | * atomic region then we must not take the fault. |
669 | */ | 870 | */ |
670 | if (unlikely(in_atomic() || !mm)) | 871 | if (unlikely(in_atomic() || !mm)) { |
671 | goto bad_area_nosemaphore; | 872 | bad_area_nosemaphore(regs, error_code, address); |
873 | return; | ||
874 | } | ||
672 | 875 | ||
673 | /* | 876 | /* |
674 | * When running in the kernel we expect faults to occur only to | 877 | * When running in the kernel we expect faults to occur only to |
@@ -686,20 +889,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
686 | * source. If this is invalid we can skip the address space check, | 889 | * source. If this is invalid we can skip the address space check, |
687 | * thus avoiding the deadlock. | 890 | * thus avoiding the deadlock. |
688 | */ | 891 | */ |
689 | if (!down_read_trylock(&mm->mmap_sem)) { | 892 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
690 | if ((error_code & PF_USER) == 0 && | 893 | if ((error_code & PF_USER) == 0 && |
691 | !search_exception_tables(regs->ip)) | 894 | !search_exception_tables(regs->ip)) { |
692 | goto bad_area_nosemaphore; | 895 | bad_area_nosemaphore(regs, error_code, address); |
896 | return; | ||
897 | } | ||
693 | down_read(&mm->mmap_sem); | 898 | down_read(&mm->mmap_sem); |
694 | } | 899 | } |
695 | 900 | ||
696 | vma = find_vma(mm, address); | 901 | vma = find_vma(mm, address); |
697 | if (!vma) | 902 | if (unlikely(!vma)) { |
698 | goto bad_area; | 903 | bad_area(regs, error_code, address); |
699 | if (vma->vm_start <= address) | 904 | return; |
905 | } | ||
906 | if (likely(vma->vm_start <= address)) | ||
700 | goto good_area; | 907 | goto good_area; |
701 | if (!(vma->vm_flags & VM_GROWSDOWN)) | 908 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { |
702 | goto bad_area; | 909 | bad_area(regs, error_code, address); |
910 | return; | ||
911 | } | ||
703 | if (error_code & PF_USER) { | 912 | if (error_code & PF_USER) { |
704 | /* | 913 | /* |
705 | * Accessing the stack below %sp is always a bug. | 914 | * Accessing the stack below %sp is always a bug. |
@@ -707,31 +916,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
707 | * and pusha to work. ("enter $65535,$31" pushes | 916 | * and pusha to work. ("enter $65535,$31" pushes |
708 | * 32 pointers and then decrements %sp by 65535.) | 917 | * 32 pointers and then decrements %sp by 65535.) |
709 | */ | 918 | */ |
710 | if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) | 919 | if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { |
711 | goto bad_area; | 920 | bad_area(regs, error_code, address); |
921 | return; | ||
922 | } | ||
712 | } | 923 | } |
713 | if (expand_stack(vma, address)) | 924 | if (unlikely(expand_stack(vma, address))) { |
714 | goto bad_area; | 925 | bad_area(regs, error_code, address); |
715 | /* | 926 | return; |
716 | * Ok, we have a good vm_area for this memory access, so | 927 | } |
717 | * we can handle it.. | 928 | |
718 | */ | 929 | /* |
930 | * Ok, we have a good vm_area for this memory access, so | ||
931 | * we can handle it.. | ||
932 | */ | ||
719 | good_area: | 933 | good_area: |
720 | si_code = SEGV_ACCERR; | 934 | write = error_code & PF_WRITE; |
721 | write = 0; | 935 | if (unlikely(access_error(error_code, write, vma))) { |
722 | switch (error_code & (PF_PROT|PF_WRITE)) { | 936 | bad_area_access_error(regs, error_code, address); |
723 | default: /* 3: write, present */ | 937 | return; |
724 | /* fall through */ | ||
725 | case PF_WRITE: /* write, not present */ | ||
726 | if (!(vma->vm_flags & VM_WRITE)) | ||
727 | goto bad_area; | ||
728 | write++; | ||
729 | break; | ||
730 | case PF_PROT: /* read, present */ | ||
731 | goto bad_area; | ||
732 | case 0: /* read, not present */ | ||
733 | if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) | ||
734 | goto bad_area; | ||
735 | } | 938 | } |
736 | 939 | ||
737 | /* | 940 | /* |
@@ -741,11 +944,8 @@ good_area: | |||
741 | */ | 944 | */ |
742 | fault = handle_mm_fault(mm, vma, address, write); | 945 | fault = handle_mm_fault(mm, vma, address, write); |
743 | if (unlikely(fault & VM_FAULT_ERROR)) { | 946 | if (unlikely(fault & VM_FAULT_ERROR)) { |
744 | if (fault & VM_FAULT_OOM) | 947 | mm_fault_error(regs, error_code, address, fault); |
745 | goto out_of_memory; | 948 | return; |
746 | else if (fault & VM_FAULT_SIGBUS) | ||
747 | goto do_sigbus; | ||
748 | BUG(); | ||
749 | } | 949 | } |
750 | if (fault & VM_FAULT_MAJOR) | 950 | if (fault & VM_FAULT_MAJOR) |
751 | tsk->maj_flt++; | 951 | tsk->maj_flt++; |
@@ -763,132 +963,6 @@ good_area: | |||
763 | } | 963 | } |
764 | #endif | 964 | #endif |
765 | up_read(&mm->mmap_sem); | 965 | up_read(&mm->mmap_sem); |
766 | return; | ||
767 | |||
768 | /* | ||
769 | * Something tried to access memory that isn't in our memory map.. | ||
770 | * Fix it, but check if it's kernel or user first.. | ||
771 | */ | ||
772 | bad_area: | ||
773 | up_read(&mm->mmap_sem); | ||
774 | |||
775 | bad_area_nosemaphore: | ||
776 | /* User mode accesses just cause a SIGSEGV */ | ||
777 | if (error_code & PF_USER) { | ||
778 | /* | ||
779 | * It's possible to have interrupts off here. | ||
780 | */ | ||
781 | local_irq_enable(); | ||
782 | |||
783 | /* | ||
784 | * Valid to do another page fault here because this one came | ||
785 | * from user space. | ||
786 | */ | ||
787 | if (is_prefetch(regs, address, error_code)) | ||
788 | return; | ||
789 | |||
790 | if (is_errata100(regs, address)) | ||
791 | return; | ||
792 | |||
793 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
794 | printk_ratelimit()) { | ||
795 | printk( | ||
796 | "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", | ||
797 | task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, | ||
798 | tsk->comm, task_pid_nr(tsk), address, | ||
799 | (void *) regs->ip, (void *) regs->sp, error_code); | ||
800 | print_vma_addr(" in ", regs->ip); | ||
801 | printk("\n"); | ||
802 | } | ||
803 | |||
804 | tsk->thread.cr2 = address; | ||
805 | /* Kernel addresses are always protection faults */ | ||
806 | tsk->thread.error_code = error_code | (address >= TASK_SIZE); | ||
807 | tsk->thread.trap_no = 14; | ||
808 | force_sig_info_fault(SIGSEGV, si_code, address, tsk); | ||
809 | return; | ||
810 | } | ||
811 | |||
812 | if (is_f00f_bug(regs, address)) | ||
813 | return; | ||
814 | |||
815 | no_context: | ||
816 | /* Are we prepared to handle this kernel fault? */ | ||
817 | if (fixup_exception(regs)) | ||
818 | return; | ||
819 | |||
820 | /* | ||
821 | * X86_32 | ||
822 | * Valid to do another page fault here, because if this fault | ||
823 | * had been triggered by is_prefetch fixup_exception would have | ||
824 | * handled it. | ||
825 | * | ||
826 | * X86_64 | ||
827 | * Hall of shame of CPU/BIOS bugs. | ||
828 | */ | ||
829 | if (is_prefetch(regs, address, error_code)) | ||
830 | return; | ||
831 | |||
832 | if (is_errata93(regs, address)) | ||
833 | return; | ||
834 | |||
835 | /* | ||
836 | * Oops. The kernel tried to access some bad page. We'll have to | ||
837 | * terminate things with extreme prejudice. | ||
838 | */ | ||
839 | #ifdef CONFIG_X86_32 | ||
840 | bust_spinlocks(1); | ||
841 | #else | ||
842 | flags = oops_begin(); | ||
843 | #endif | ||
844 | |||
845 | show_fault_oops(regs, error_code, address); | ||
846 | |||
847 | stackend = end_of_stack(tsk); | ||
848 | if (*stackend != STACK_END_MAGIC) | ||
849 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); | ||
850 | |||
851 | tsk->thread.cr2 = address; | ||
852 | tsk->thread.trap_no = 14; | ||
853 | tsk->thread.error_code = error_code; | ||
854 | |||
855 | #ifdef CONFIG_X86_32 | ||
856 | die("Oops", regs, error_code); | ||
857 | bust_spinlocks(0); | ||
858 | do_exit(SIGKILL); | ||
859 | #else | ||
860 | sig = SIGKILL; | ||
861 | if (__die("Oops", regs, error_code)) | ||
862 | sig = 0; | ||
863 | /* Executive summary in case the body of the oops scrolled away */ | ||
864 | printk(KERN_EMERG "CR2: %016lx\n", address); | ||
865 | oops_end(flags, regs, sig); | ||
866 | #endif | ||
867 | |||
868 | out_of_memory: | ||
869 | /* | ||
870 | * We ran out of memory, call the OOM killer, and return the userspace | ||
871 | * (which will retry the fault, or kill us if we got oom-killed). | ||
872 | */ | ||
873 | up_read(&mm->mmap_sem); | ||
874 | pagefault_out_of_memory(); | ||
875 | return; | ||
876 | |||
877 | do_sigbus: | ||
878 | up_read(&mm->mmap_sem); | ||
879 | |||
880 | /* Kernel mode? Handle exceptions or die */ | ||
881 | if (!(error_code & PF_USER)) | ||
882 | goto no_context; | ||
883 | #ifdef CONFIG_X86_32 | ||
884 | /* User space => ok to do another page fault */ | ||
885 | if (is_prefetch(regs, address, error_code)) | ||
886 | return; | ||
887 | #endif | ||
888 | tsk->thread.cr2 = address; | ||
889 | tsk->thread.error_code = error_code; | ||
890 | tsk->thread.trap_no = 14; | ||
891 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | ||
892 | } | 966 | } |
893 | 967 | ||
894 | DEFINE_SPINLOCK(pgd_lock); | 968 | DEFINE_SPINLOCK(pgd_lock); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4cf30dee8161..e89d24815f26 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -555,12 +555,10 @@ repeat: | |||
555 | if (!pte_val(old_pte)) { | 555 | if (!pte_val(old_pte)) { |
556 | if (!primary) | 556 | if (!primary) |
557 | return 0; | 557 | return 0; |
558 | 558 | WARN(1, KERN_WARNING "CPA: called for zero pte. " | |
559 | /* | 559 | "vaddr = %lx cpa->vaddr = %lx\n", address, |
560 | * Special error value returned, indicating that the mapping | 560 | *cpa->vaddr); |
561 | * did not exist at this address. | 561 | return -EINVAL; |
562 | */ | ||
563 | return -EFAULT; | ||
564 | } | 562 | } |
565 | 563 | ||
566 | if (level == PG_LEVEL_4K) { | 564 | if (level == PG_LEVEL_4K) { |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 3be399013de6..c9488513fd70 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -522,35 +522,6 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) | |||
522 | } | 522 | } |
523 | #endif /* CONFIG_STRICT_DEVMEM */ | 523 | #endif /* CONFIG_STRICT_DEVMEM */ |
524 | 524 | ||
525 | /* | ||
526 | * Change the memory type for the physial address range in kernel identity | ||
527 | * mapping space if that range is a part of identity map. | ||
528 | */ | ||
529 | static int kernel_map_sync_memtype(u64 base, unsigned long size, | ||
530 | unsigned long flags) | ||
531 | { | ||
532 | unsigned long id_sz; | ||
533 | int ret; | ||
534 | |||
535 | if (!pat_enabled || base >= __pa(high_memory)) | ||
536 | return 0; | ||
537 | |||
538 | id_sz = (__pa(high_memory) < base + size) ? | ||
539 | __pa(high_memory) - base : | ||
540 | size; | ||
541 | |||
542 | ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags); | ||
543 | /* | ||
544 | * -EFAULT return means that the addr was not valid and did not have | ||
545 | * any identity mapping. That case is a success for | ||
546 | * kernel_map_sync_memtype. | ||
547 | */ | ||
548 | if (ret == -EFAULT) | ||
549 | ret = 0; | ||
550 | |||
551 | return ret; | ||
552 | } | ||
553 | |||
554 | int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | 525 | int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, |
555 | unsigned long size, pgprot_t *vma_prot) | 526 | unsigned long size, pgprot_t *vma_prot) |
556 | { | 527 | { |
@@ -601,7 +572,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
601 | if (retval < 0) | 572 | if (retval < 0) |
602 | return 0; | 573 | return 0; |
603 | 574 | ||
604 | if (kernel_map_sync_memtype(offset, size, flags)) { | 575 | if (((pfn < max_low_pfn_mapped) || |
576 | (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && | ||
577 | ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { | ||
605 | free_memtype(offset, offset + size); | 578 | free_memtype(offset, offset + size); |
606 | printk(KERN_INFO | 579 | printk(KERN_INFO |
607 | "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", | 580 | "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", |
@@ -649,7 +622,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
649 | int strict_prot) | 622 | int strict_prot) |
650 | { | 623 | { |
651 | int is_ram = 0; | 624 | int is_ram = 0; |
652 | int ret; | 625 | int id_sz, ret; |
653 | unsigned long flags; | 626 | unsigned long flags; |
654 | unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); | 627 | unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); |
655 | 628 | ||
@@ -690,7 +663,15 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
690 | flags); | 663 | flags); |
691 | } | 664 | } |
692 | 665 | ||
693 | if (kernel_map_sync_memtype(paddr, size, flags)) { | 666 | /* Need to keep identity mapping in sync */ |
667 | if (paddr >= __pa(high_memory)) | ||
668 | return 0; | ||
669 | |||
670 | id_sz = (__pa(high_memory) < paddr + size) ? | ||
671 | __pa(high_memory) - paddr : | ||
672 | size; | ||
673 | |||
674 | if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { | ||
694 | free_memtype(paddr, paddr + size); | 675 | free_memtype(paddr, paddr + size); |
695 | printk(KERN_ERR | 676 | printk(KERN_ERR |
696 | "%s:%d reserve_pfn_range ioremap_change_attr failed %s " | 677 | "%s:%d reserve_pfn_range ioremap_change_attr failed %s " |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index b3ca1b940654..72a6d4ebe34d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -29,7 +29,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) | |||
29 | * To avoid global state use 8 different call vectors. | 29 | * To avoid global state use 8 different call vectors. |
30 | * Each CPU uses a specific vector to trigger flushes on other | 30 | * Each CPU uses a specific vector to trigger flushes on other |
31 | * CPUs. Depending on the received vector the target CPUs look into | 31 | * CPUs. Depending on the received vector the target CPUs look into |
32 | * the right per cpu variable for the flush data. | 32 | * the right array slot for the flush data. |
33 | * | 33 | * |
34 | * With more than 8 CPUs they are hashed to the 8 available | 34 | * With more than 8 CPUs they are hashed to the 8 available |
35 | * vectors. The limited global vector space forces us to this right now. | 35 | * vectors. The limited global vector space forces us to this right now. |
@@ -44,13 +44,13 @@ union smp_flush_state { | |||
44 | spinlock_t tlbstate_lock; | 44 | spinlock_t tlbstate_lock; |
45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
46 | }; | 46 | }; |
47 | char pad[SMP_CACHE_BYTES]; | 47 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; |
48 | } ____cacheline_aligned; | 48 | } ____cacheline_internodealigned_in_smp; |
49 | 49 | ||
50 | /* State is put into the per CPU data section, but padded | 50 | /* State is put into the per CPU data section, but padded |
51 | to a full cache line because other CPUs can access it and we don't | 51 | to a full cache line because other CPUs can access it and we don't |
52 | want false sharing in the per cpu data segment. */ | 52 | want false sharing in the per cpu data segment. */ |
53 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | 53 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * We cannot call mmdrop() because we are in interrupt context, | 56 | * We cannot call mmdrop() because we are in interrupt context, |
@@ -135,7 +135,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
135 | * Use that to determine where the sender put the data. | 135 | * Use that to determine where the sender put the data. |
136 | */ | 136 | */ |
137 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | 137 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; |
138 | f = &per_cpu(flush_state, sender); | 138 | f = &flush_state[sender]; |
139 | 139 | ||
140 | if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) | 140 | if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) |
141 | goto out; | 141 | goto out; |
@@ -173,7 +173,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
173 | 173 | ||
174 | /* Caller has disabled preemption */ | 174 | /* Caller has disabled preemption */ |
175 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 175 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; |
176 | f = &per_cpu(flush_state, sender); | 176 | f = &flush_state[sender]; |
177 | 177 | ||
178 | /* | 178 | /* |
179 | * Could avoid this lock when | 179 | * Could avoid this lock when |
@@ -227,8 +227,8 @@ static int __cpuinit init_smp_flush(void) | |||
227 | { | 227 | { |
228 | int i; | 228 | int i; |
229 | 229 | ||
230 | for_each_possible_cpu(i) | 230 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
231 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | 231 | spin_lock_init(&flush_state[i].tlbstate_lock); |
232 | 232 | ||
233 | return 0; | 233 | return 0; |
234 | } | 234 | } |