aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-12 16:16:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-13 11:46:39 -0400
commit891cffbd6bcba26409869c19c07ecd4bfc0c2460 (patch)
treecddf5286aedb76eecffd976101273e66858c4a23 /arch/x86/mm/fault.c
parent4480f15b3306f43bbb0310d461142b4e897ca45b (diff)
x86/mm: do not trigger a kernel warning if user-space disables interrupts and generates a page fault
Arjan reported a spike in the following bug pattern in v2.6.27: http://www.kerneloops.org/searchweek.php?search=lock_page which happens because hwclock started triggering warnings due to a (correct) might_sleep() check in the MM code. The warning occurs because hwclock uses this dubious sequence of code to run "atomic" code: static unsigned long atomic(const char *name, unsigned long (*op)(unsigned long), unsigned long arg) { unsigned long v; __asm__ volatile ("cli"); v = (*op)(arg); __asm__ volatile ("sti"); return v; } Then it pagefaults in that "atomic" section, triggering the warning. There is no way the kernel could provide "atomicity" in this path, a page fault is a cannot-continue machine event so the kernel has to wait for the page to be filled in. Even if it was just a minor fault we'd have to take locks and might have to spend quite a bit of time with interrupts disabled - not nice to irq latencies in general. So instead just enable interrupts in the pagefault path unconditionally if we come from user-space, and handle the fault. Also, while touching this code, unify some trivial parts of the x86 VM paths at the same time. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Reported-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c30
1 files changed, 11 insertions, 19 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a742d753d5b..ac2ad781da0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -645,24 +645,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
645 } 645 }
646 646
647 647
648#ifdef CONFIG_X86_32
649 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
650 fault has been handled. */
651 if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
652 local_irq_enable();
653
654 /* 648 /*
655 * If we're in an interrupt, have no user context or are running in an 649 * It's safe to allow irq's after cr2 has been saved and the
656 * atomic region then we must not take the fault. 650 * vmalloc fault has been handled.
651 *
652 * User-mode registers count as a user access even for any
653 * potential system fault or CPU buglet.
657 */ 654 */
658 if (in_atomic() || !mm) 655 if (user_mode_vm(regs)) {
659 goto bad_area_nosemaphore; 656 local_irq_enable();
660#else /* CONFIG_X86_64 */ 657 error_code |= PF_USER;
661 if (likely(regs->flags & X86_EFLAGS_IF)) 658 } else if (regs->flags & X86_EFLAGS_IF)
662 local_irq_enable(); 659 local_irq_enable();
663 660
661#ifdef CONFIG_X86_64
664 if (unlikely(error_code & PF_RSVD)) 662 if (unlikely(error_code & PF_RSVD))
665 pgtable_bad(address, regs, error_code); 663 pgtable_bad(address, regs, error_code);
664#endif
666 665
667 /* 666 /*
668 * If we're in an interrupt, have no user context or are running in an 667 * If we're in an interrupt, have no user context or are running in an
@@ -671,14 +670,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
671 if (unlikely(in_atomic() || !mm)) 670 if (unlikely(in_atomic() || !mm))
672 goto bad_area_nosemaphore; 671 goto bad_area_nosemaphore;
673 672
674 /*
675 * User-mode registers count as a user access even for any
676 * potential system fault or CPU buglet.
677 */
678 if (user_mode_vm(regs))
679 error_code |= PF_USER;
680again: 673again:
681#endif
682 /* When running in the kernel we expect faults to occur only to 674 /* When running in the kernel we expect faults to occur only to
683 * addresses in user space. All other faults represent errors in the 675 * addresses in user space. All other faults represent errors in the
684 * kernel and should generate an OOPS. Unfortunately, in the case of an 676 * kernel and should generate an OOPS. Unfortunately, in the case of an