diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-12 16:16:12 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-13 11:46:39 -0400 |
commit | 891cffbd6bcba26409869c19c07ecd4bfc0c2460 (patch) | |
tree | cddf5286aedb76eecffd976101273e66858c4a23 /arch/x86/mm/fault.c | |
parent | 4480f15b3306f43bbb0310d461142b4e897ca45b (diff) |
x86/mm: do not trigger a kernel warning if user-space disables interrupts and generates a page fault
Arjan reported a spike in the following bug pattern in v2.6.27:
http://www.kerneloops.org/searchweek.php?search=lock_page
which happens because hwclock started triggering warnings due to
a (correct) might_sleep() check in the MM code.
The warning occurs because hwclock uses this dubious sequence of
code to run "atomic" code:
static unsigned long
atomic(const char *name, unsigned long (*op)(unsigned long),
unsigned long arg)
{
unsigned long v;
__asm__ volatile ("cli");
v = (*op)(arg);
__asm__ volatile ("sti");
return v;
}
Then it pagefaults in that "atomic" section, triggering the warning.
There is no way the kernel could provide "atomicity" in this path,
a page fault is a cannot-continue machine event so the kernel has to
wait for the page to be filled in.
Even if it was just a minor fault we'd have to take locks and might have
to spend quite a bit of time with interrupts disabled - not nice to irq
latencies in general.
So instead just enable interrupts in the pagefault path unconditionally
if we come from user-space, and handle the fault.
Also, while touching this code, unify some trivial parts of the x86
VM paths at the same time.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r-- | arch/x86/mm/fault.c | 30 |
1 files changed, 11 insertions, 19 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a742d753d5b0..ac2ad781da00 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -645,24 +645,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
645 | } | 645 | } |
646 | 646 | ||
647 | 647 | ||
648 | #ifdef CONFIG_X86_32 | ||
649 | /* It's safe to allow irq's after cr2 has been saved and the vmalloc | ||
650 | fault has been handled. */ | ||
651 | if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK)) | ||
652 | local_irq_enable(); | ||
653 | |||
654 | /* | 648 | /* |
655 | * If we're in an interrupt, have no user context or are running in an | 649 | * It's safe to allow irq's after cr2 has been saved and the |
656 | * atomic region then we must not take the fault. | 650 | * vmalloc fault has been handled. |
651 | * | ||
652 | * User-mode registers count as a user access even for any | ||
653 | * potential system fault or CPU buglet. | ||
657 | */ | 654 | */ |
658 | if (in_atomic() || !mm) | 655 | if (user_mode_vm(regs)) { |
659 | goto bad_area_nosemaphore; | 656 | local_irq_enable(); |
660 | #else /* CONFIG_X86_64 */ | 657 | error_code |= PF_USER; |
661 | if (likely(regs->flags & X86_EFLAGS_IF)) | 658 | } else if (regs->flags & X86_EFLAGS_IF) |
662 | local_irq_enable(); | 659 | local_irq_enable(); |
663 | 660 | ||
661 | #ifdef CONFIG_X86_64 | ||
664 | if (unlikely(error_code & PF_RSVD)) | 662 | if (unlikely(error_code & PF_RSVD)) |
665 | pgtable_bad(address, regs, error_code); | 663 | pgtable_bad(address, regs, error_code); |
664 | #endif | ||
666 | 665 | ||
667 | /* | 666 | /* |
668 | * If we're in an interrupt, have no user context or are running in an | 667 | * If we're in an interrupt, have no user context or are running in an |
@@ -671,14 +670,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
671 | if (unlikely(in_atomic() || !mm)) | 670 | if (unlikely(in_atomic() || !mm)) |
672 | goto bad_area_nosemaphore; | 671 | goto bad_area_nosemaphore; |
673 | 672 | ||
674 | /* | ||
675 | * User-mode registers count as a user access even for any | ||
676 | * potential system fault or CPU buglet. | ||
677 | */ | ||
678 | if (user_mode_vm(regs)) | ||
679 | error_code |= PF_USER; | ||
680 | again: | 673 | again: |
681 | #endif | ||
682 | /* When running in the kernel we expect faults to occur only to | 674 | /* When running in the kernel we expect faults to occur only to |
683 | * addresses in user space. All other faults represent errors in the | 675 | * addresses in user space. All other faults represent errors in the |
684 | * kernel and should generate an OOPS. Unfortunately, in the case of an | 676 | * kernel and should generate an OOPS. Unfortunately, in the case of an |