aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-10-24 06:52:06 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-29 07:02:54 -0400
commite00b12e64be9a34ef071de7b6052ca9ea29dd460 (patch)
tree2f3395d06d639550039f3c9aa69c4ad0a4854327
parent2c42cfbfe10872929c2ba1f8130e31063ff59b94 (diff)
perf/x86: Further optimize copy_from_user_nmi()
Now that we can deal with nested NMI due to IRET re-enabling NMIs and can deal with faults from NMI by making sure we preserve CR2 over NMIs we can in fact simply access user-space memory from NMI context. So rewrite copy_from_user_nmi() to use __copy_from_user_inatomic() and rework the fault path to do the minimal required work before taking the in_atomic() fault handler. In particular avoid perf_sw_event() which would make perf recurse on itself (it should be harmless as our recursion protections should be able to deal with this -- but why tempt fate). Also rename notify_page_fault() to kprobes_fault() as that is a much better name; there is no notifier in it and its specific to kprobes. Don measured that his worst case NMI path shrunk from ~300K cycles to ~150K cycles. Cc: Stephane Eranian <eranian@google.com> Cc: jmario@redhat.com Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: dave.hansen@linux.intel.com Tested-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20131024105206.GM2490@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/lib/usercopy.c43
-rw-r--r--arch/x86/mm/fault.c41
2 files changed, 36 insertions, 48 deletions
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 4f74d94c8d97..5465b8613944 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -11,39 +11,26 @@
11#include <linux/sched.h> 11#include <linux/sched.h>
12 12
13/* 13/*
14 * best effort, GUP based copy_from_user() that is NMI-safe 14 * We rely on the nested NMI work to allow atomic faults from the NMI path; the
15 * nested NMI paths are careful to preserve CR2.
15 */ 16 */
16unsigned long 17unsigned long
17copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 18copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
18{ 19{
19 unsigned long offset, addr = (unsigned long)from; 20 unsigned long ret;
20 unsigned long size, len = 0;
21 struct page *page;
22 void *map;
23 int ret;
24 21
25 if (__range_not_ok(from, n, TASK_SIZE)) 22 if (__range_not_ok(from, n, TASK_SIZE))
26 return len; 23 return 0;
27 24
28 do { 25 /*
29 ret = __get_user_pages_fast(addr, 1, 0, &page); 26 * Even though this function is typically called from NMI/IRQ context
30 if (!ret) 27 * disable pagefaults so that its behaviour is consistent even when
31 break; 28 * called form other contexts.
32 29 */
33 offset = addr & (PAGE_SIZE - 1); 30 pagefault_disable();
34 size = min(PAGE_SIZE - offset, n - len); 31 ret = __copy_from_user_inatomic(to, from, n);
35 32 pagefault_enable();
36 map = kmap_atomic(page); 33
37 memcpy(to, map+offset, size); 34 return n - ret;
38 kunmap_atomic(map);
39 put_page(page);
40
41 len += size;
42 to += size;
43 addr += size;
44
45 } while (len < n);
46
47 return len;
48} 35}
49EXPORT_SYMBOL_GPL(copy_from_user_nmi); 36EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3aaeffcfd67a..7a517bb41060 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
51 return 0; 51 return 0;
52} 52}
53 53
54static inline int __kprobes notify_page_fault(struct pt_regs *regs) 54static inline int __kprobes kprobes_fault(struct pt_regs *regs)
55{ 55{
56 int ret = 0; 56 int ret = 0;
57 57
@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1048 return; 1048 return;
1049 1049
1050 /* kprobes don't want to hook the spurious faults: */ 1050 /* kprobes don't want to hook the spurious faults: */
1051 if (notify_page_fault(regs)) 1051 if (kprobes_fault(regs))
1052 return; 1052 return;
1053 /* 1053 /*
1054 * Don't take the mm semaphore here. If we fixup a prefetch 1054 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1060 } 1060 }
1061 1061
1062 /* kprobes don't want to hook the spurious faults: */ 1062 /* kprobes don't want to hook the spurious faults: */
1063 if (unlikely(notify_page_fault(regs))) 1063 if (unlikely(kprobes_fault(regs)))
1064 return; 1064 return;
1065 /*
1066 * It's safe to allow irq's after cr2 has been saved and the
1067 * vmalloc fault has been handled.
1068 *
1069 * User-mode registers count as a user access even for any
1070 * potential system fault or CPU buglet:
1071 */
1072 if (user_mode_vm(regs)) {
1073 local_irq_enable();
1074 error_code |= PF_USER;
1075 flags |= FAULT_FLAG_USER;
1076 } else {
1077 if (regs->flags & X86_EFLAGS_IF)
1078 local_irq_enable();
1079 }
1080 1065
1081 if (unlikely(error_code & PF_RSVD)) 1066 if (unlikely(error_code & PF_RSVD))
1082 pgtable_bad(regs, error_code, address); 1067 pgtable_bad(regs, error_code, address);
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1088 } 1073 }
1089 } 1074 }
1090 1075
1091 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1092
1093 /* 1076 /*
1094 * If we're in an interrupt, have no user context or are running 1077 * If we're in an interrupt, have no user context or are running
1095 * in an atomic region then we must not take the fault: 1078 * in an atomic region then we must not take the fault:
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1099 return; 1082 return;
1100 } 1083 }
1101 1084
1085 /*
1086 * It's safe to allow irq's after cr2 has been saved and the
1087 * vmalloc fault has been handled.
1088 *
1089 * User-mode registers count as a user access even for any
1090 * potential system fault or CPU buglet:
1091 */
1092 if (user_mode_vm(regs)) {
1093 local_irq_enable();
1094 error_code |= PF_USER;
1095 flags |= FAULT_FLAG_USER;
1096 } else {
1097 if (regs->flags & X86_EFLAGS_IF)
1098 local_irq_enable();
1099 }
1100
1101 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1102
1102 if (error_code & PF_WRITE) 1103 if (error_code & PF_WRITE)
1103 flags |= FAULT_FLAG_WRITE; 1104 flags |= FAULT_FLAG_WRITE;
1104 1105