diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-06-15 07:07:24 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-15 09:57:53 -0400 |
commit | 74193ef0ecab92535c8517f082f1f50504526c9b (patch) | |
tree | 532a763dabbd198c514707077aae4df4bb7de15e | |
parent | 3ff0141aa3a03ca3388b40b36167d0a37919f3fd (diff) |
perf_counter: x86: Fix call-chain support to use NMI-safe methods
__copy_from_user_inatomic() isn't NMI safe in that it can trigger
the page fault handler which is another trap and its return path
invokes IRET which will also close the NMI context.
Therefore use a GUP based approach to copy the stack frames over.
We tried an alternative solution as well: we used a forward ported
version of Mathieu Desnoyers's "NMI safe INT3 and Page Fault" patch
that modifies the exception return path to use an open-coded IRET with
explicit stack unrolling and TF checking.
This didnt work as it interacted with faulting user-space instructions,
causing them not to restart properly, which corrupts user-space
registers.
Solving that would probably involve disassembling those instructions
and backtracing the RIP. But even without that, the code was deemed
rather complex to the already non-trivial x86 entry assembly code,
so instead we went for this GUP based method that does a
software-walk of the pagetables.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 49 |
1 files changed, 39 insertions, 10 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6d5e7cfd97e7..e8c68a5091df 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kdebug.h> | 19 | #include <linux/kdebug.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | ||
22 | 23 | ||
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
24 | #include <asm/stacktrace.h> | 25 | #include <asm/stacktrace.h> |
@@ -1617,20 +1618,48 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1617 | entry->kernel = entry->nr - nr; | 1618 | entry->kernel = entry->nr - nr; |
1618 | } | 1619 | } |
1619 | 1620 | ||
1620 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1621 | /* |
1622 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1623 | */ | ||
1624 | static unsigned long | ||
1625 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1621 | { | 1626 | { |
1627 | unsigned long offset, addr = (unsigned long)from; | ||
1628 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1629 | unsigned long size, len = 0; | ||
1630 | struct page *page; | ||
1631 | void *map; | ||
1622 | int ret; | 1632 | int ret; |
1623 | 1633 | ||
1624 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | 1634 | do { |
1625 | return 0; | 1635 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
1636 | if (!ret) | ||
1637 | break; | ||
1626 | 1638 | ||
1627 | ret = 1; | 1639 | offset = addr & (PAGE_SIZE - 1); |
1628 | pagefault_disable(); | 1640 | size = min(PAGE_SIZE - offset, n - len); |
1629 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1630 | ret = 0; | ||
1631 | pagefault_enable(); | ||
1632 | 1641 | ||
1633 | return ret; | 1642 | map = kmap_atomic(page, type); |
1643 | memcpy(to, map+offset, size); | ||
1644 | kunmap_atomic(map, type); | ||
1645 | put_page(page); | ||
1646 | |||
1647 | len += size; | ||
1648 | to += size; | ||
1649 | addr += size; | ||
1650 | |||
1651 | } while (len < n); | ||
1652 | |||
1653 | return len; | ||
1654 | } | ||
1655 | |||
1656 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
1657 | { | ||
1658 | unsigned long bytes; | ||
1659 | |||
1660 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | ||
1661 | |||
1662 | return bytes == sizeof(*frame); | ||
1634 | } | 1663 | } |
1635 | 1664 | ||
1636 | static void | 1665 | static void |
@@ -1643,7 +1672,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1643 | if (!user_mode(regs)) | 1672 | if (!user_mode(regs)) |
1644 | regs = task_pt_regs(current); | 1673 | regs = task_pt_regs(current); |
1645 | 1674 | ||
1646 | fp = (void __user *)regs->bp; | 1675 | fp = (void __user *)regs->bp; |
1647 | 1676 | ||
1648 | callchain_store(entry, regs->ip); | 1677 | callchain_store(entry, regs->ip); |
1649 | 1678 | ||