diff options
author | Peter Zijlstra <peterz@infradead.org> | 2013-10-24 06:52:06 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-29 07:02:54 -0400 |
commit | e00b12e64be9a34ef071de7b6052ca9ea29dd460 (patch) | |
tree | 2f3395d06d639550039f3c9aa69c4ad0a4854327 | |
parent | 2c42cfbfe10872929c2ba1f8130e31063ff59b94 (diff) |
perf/x86: Further optimize copy_from_user_nmi()
Now that we can deal with nested NMI due to IRET re-enabling NMIs and
can deal with faults from NMI by making sure we preserve CR2 over NMIs
we can in fact simply access user-space memory from NMI context.
So rewrite copy_from_user_nmi() to use __copy_from_user_inatomic() and
rework the fault path to do the minimal required work before taking
the in_atomic() fault handler.
In particular avoid perf_sw_event() which would make perf recurse on
itself (it should be harmless as our recursion protections should be
able to deal with this -- but why tempt fate).
Also rename notify_page_fault() to kprobes_fault() as that is a much
better name; there is no notifier in it and its specific to kprobes.
Don measured that his worst case NMI path shrunk from ~300K cycles to
~150K cycles.
Cc: Stephane Eranian <eranian@google.com>
Cc: jmario@redhat.com
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: dave.hansen@linux.intel.com
Tested-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131024105206.GM2490@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/lib/usercopy.c | 43 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 41 |
2 files changed, 36 insertions, 48 deletions
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 4f74d94c8d97..5465b8613944 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
@@ -11,39 +11,26 @@ | |||
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * best effort, GUP based copy_from_user() that is NMI-safe | 14 | * We rely on the nested NMI work to allow atomic faults from the NMI path; the |
15 | * nested NMI paths are careful to preserve CR2. | ||
15 | */ | 16 | */ |
16 | unsigned long | 17 | unsigned long |
17 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | 18 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
18 | { | 19 | { |
19 | unsigned long offset, addr = (unsigned long)from; | 20 | unsigned long ret; |
20 | unsigned long size, len = 0; | ||
21 | struct page *page; | ||
22 | void *map; | ||
23 | int ret; | ||
24 | 21 | ||
25 | if (__range_not_ok(from, n, TASK_SIZE)) | 22 | if (__range_not_ok(from, n, TASK_SIZE)) |
26 | return len; | 23 | return 0; |
27 | 24 | ||
28 | do { | 25 | /* |
29 | ret = __get_user_pages_fast(addr, 1, 0, &page); | 26 | * Even though this function is typically called from NMI/IRQ context |
30 | if (!ret) | 27 | * disable pagefaults so that its behaviour is consistent even when |
31 | break; | 28 | * called form other contexts. |
32 | 29 | */ | |
33 | offset = addr & (PAGE_SIZE - 1); | 30 | pagefault_disable(); |
34 | size = min(PAGE_SIZE - offset, n - len); | 31 | ret = __copy_from_user_inatomic(to, from, n); |
35 | 32 | pagefault_enable(); | |
36 | map = kmap_atomic(page); | 33 | |
37 | memcpy(to, map+offset, size); | 34 | return n - ret; |
38 | kunmap_atomic(map); | ||
39 | put_page(page); | ||
40 | |||
41 | len += size; | ||
42 | to += size; | ||
43 | addr += size; | ||
44 | |||
45 | } while (len < n); | ||
46 | |||
47 | return len; | ||
48 | } | 35 | } |
49 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); | 36 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3aaeffcfd67a..7a517bb41060 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
51 | return 0; | 51 | return 0; |
52 | } | 52 | } |
53 | 53 | ||
54 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) | 54 | static inline int __kprobes kprobes_fault(struct pt_regs *regs) |
55 | { | 55 | { |
56 | int ret = 0; | 56 | int ret = 0; |
57 | 57 | ||
@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1048 | return; | 1048 | return; |
1049 | 1049 | ||
1050 | /* kprobes don't want to hook the spurious faults: */ | 1050 | /* kprobes don't want to hook the spurious faults: */ |
1051 | if (notify_page_fault(regs)) | 1051 | if (kprobes_fault(regs)) |
1052 | return; | 1052 | return; |
1053 | /* | 1053 | /* |
1054 | * Don't take the mm semaphore here. If we fixup a prefetch | 1054 | * Don't take the mm semaphore here. If we fixup a prefetch |
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | /* kprobes don't want to hook the spurious faults: */ | 1062 | /* kprobes don't want to hook the spurious faults: */ |
1063 | if (unlikely(notify_page_fault(regs))) | 1063 | if (unlikely(kprobes_fault(regs))) |
1064 | return; | 1064 | return; |
1065 | /* | ||
1066 | * It's safe to allow irq's after cr2 has been saved and the | ||
1067 | * vmalloc fault has been handled. | ||
1068 | * | ||
1069 | * User-mode registers count as a user access even for any | ||
1070 | * potential system fault or CPU buglet: | ||
1071 | */ | ||
1072 | if (user_mode_vm(regs)) { | ||
1073 | local_irq_enable(); | ||
1074 | error_code |= PF_USER; | ||
1075 | flags |= FAULT_FLAG_USER; | ||
1076 | } else { | ||
1077 | if (regs->flags & X86_EFLAGS_IF) | ||
1078 | local_irq_enable(); | ||
1079 | } | ||
1080 | 1065 | ||
1081 | if (unlikely(error_code & PF_RSVD)) | 1066 | if (unlikely(error_code & PF_RSVD)) |
1082 | pgtable_bad(regs, error_code, address); | 1067 | pgtable_bad(regs, error_code, address); |
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1088 | } | 1073 | } |
1089 | } | 1074 | } |
1090 | 1075 | ||
1091 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
1092 | |||
1093 | /* | 1076 | /* |
1094 | * If we're in an interrupt, have no user context or are running | 1077 | * If we're in an interrupt, have no user context or are running |
1095 | * in an atomic region then we must not take the fault: | 1078 | * in an atomic region then we must not take the fault: |
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1099 | return; | 1082 | return; |
1100 | } | 1083 | } |
1101 | 1084 | ||
1085 | /* | ||
1086 | * It's safe to allow irq's after cr2 has been saved and the | ||
1087 | * vmalloc fault has been handled. | ||
1088 | * | ||
1089 | * User-mode registers count as a user access even for any | ||
1090 | * potential system fault or CPU buglet: | ||
1091 | */ | ||
1092 | if (user_mode_vm(regs)) { | ||
1093 | local_irq_enable(); | ||
1094 | error_code |= PF_USER; | ||
1095 | flags |= FAULT_FLAG_USER; | ||
1096 | } else { | ||
1097 | if (regs->flags & X86_EFLAGS_IF) | ||
1098 | local_irq_enable(); | ||
1099 | } | ||
1100 | |||
1101 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
1102 | |||
1102 | if (error_code & PF_WRITE) | 1103 | if (error_code & PF_WRITE) |
1103 | flags |= FAULT_FLAG_WRITE; | 1104 | flags |= FAULT_FLAG_WRITE; |
1104 | 1105 | ||