diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2013-10-24 06:52:06 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2013-10-29 07:02:54 -0400 |
| commit | e00b12e64be9a34ef071de7b6052ca9ea29dd460 (patch) | |
| tree | 2f3395d06d639550039f3c9aa69c4ad0a4854327 | |
| parent | 2c42cfbfe10872929c2ba1f8130e31063ff59b94 (diff) | |
perf/x86: Further optimize copy_from_user_nmi()
Now that we can deal with nested NMI due to IRET re-enabling NMIs and
can deal with faults from NMI by making sure we preserve CR2 over NMIs
we can in fact simply access user-space memory from NMI context.
So rewrite copy_from_user_nmi() to use __copy_from_user_inatomic() and
rework the fault path to do the minimal required work before taking
the in_atomic() fault handler.
In particular avoid perf_sw_event() which would make perf recurse on
itself (it should be harmless as our recursion protections should be
able to deal with this -- but why tempt fate).
Also rename notify_page_fault() to kprobes_fault() as that is a much
better name; there is no notifier in it and its specific to kprobes.
Don measured that his worst case NMI path shrunk from ~300K cycles to
~150K cycles.
Cc: Stephane Eranian <eranian@google.com>
Cc: jmario@redhat.com
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: dave.hansen@linux.intel.com
Tested-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131024105206.GM2490@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | arch/x86/lib/usercopy.c | 43 | ||||
| -rw-r--r-- | arch/x86/mm/fault.c | 41 |
2 files changed, 36 insertions, 48 deletions
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 4f74d94c8d97..5465b8613944 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
| @@ -11,39 +11,26 @@ | |||
| 11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
| 12 | 12 | ||
| 13 | /* | 13 | /* |
| 14 | * best effort, GUP based copy_from_user() that is NMI-safe | 14 | * We rely on the nested NMI work to allow atomic faults from the NMI path; the |
| 15 | * nested NMI paths are careful to preserve CR2. | ||
| 15 | */ | 16 | */ |
| 16 | unsigned long | 17 | unsigned long |
| 17 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | 18 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
| 18 | { | 19 | { |
| 19 | unsigned long offset, addr = (unsigned long)from; | 20 | unsigned long ret; |
| 20 | unsigned long size, len = 0; | ||
| 21 | struct page *page; | ||
| 22 | void *map; | ||
| 23 | int ret; | ||
| 24 | 21 | ||
| 25 | if (__range_not_ok(from, n, TASK_SIZE)) | 22 | if (__range_not_ok(from, n, TASK_SIZE)) |
| 26 | return len; | 23 | return 0; |
| 27 | 24 | ||
| 28 | do { | 25 | /* |
| 29 | ret = __get_user_pages_fast(addr, 1, 0, &page); | 26 | * Even though this function is typically called from NMI/IRQ context |
| 30 | if (!ret) | 27 | * disable pagefaults so that its behaviour is consistent even when |
| 31 | break; | 28 | * called form other contexts. |
| 32 | 29 | */ | |
| 33 | offset = addr & (PAGE_SIZE - 1); | 30 | pagefault_disable(); |
| 34 | size = min(PAGE_SIZE - offset, n - len); | 31 | ret = __copy_from_user_inatomic(to, from, n); |
| 35 | 32 | pagefault_enable(); | |
| 36 | map = kmap_atomic(page); | 33 | |
| 37 | memcpy(to, map+offset, size); | 34 | return n - ret; |
| 38 | kunmap_atomic(map); | ||
| 39 | put_page(page); | ||
| 40 | |||
| 41 | len += size; | ||
| 42 | to += size; | ||
| 43 | addr += size; | ||
| 44 | |||
| 45 | } while (len < n); | ||
| 46 | |||
| 47 | return len; | ||
| 48 | } | 35 | } |
| 49 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); | 36 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3aaeffcfd67a..7a517bb41060 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
| 51 | return 0; | 51 | return 0; |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) | 54 | static inline int __kprobes kprobes_fault(struct pt_regs *regs) |
| 55 | { | 55 | { |
| 56 | int ret = 0; | 56 | int ret = 0; |
| 57 | 57 | ||
| @@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 1048 | return; | 1048 | return; |
| 1049 | 1049 | ||
| 1050 | /* kprobes don't want to hook the spurious faults: */ | 1050 | /* kprobes don't want to hook the spurious faults: */ |
| 1051 | if (notify_page_fault(regs)) | 1051 | if (kprobes_fault(regs)) |
| 1052 | return; | 1052 | return; |
| 1053 | /* | 1053 | /* |
| 1054 | * Don't take the mm semaphore here. If we fixup a prefetch | 1054 | * Don't take the mm semaphore here. If we fixup a prefetch |
| @@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 1060 | } | 1060 | } |
| 1061 | 1061 | ||
| 1062 | /* kprobes don't want to hook the spurious faults: */ | 1062 | /* kprobes don't want to hook the spurious faults: */ |
| 1063 | if (unlikely(notify_page_fault(regs))) | 1063 | if (unlikely(kprobes_fault(regs))) |
| 1064 | return; | 1064 | return; |
| 1065 | /* | ||
| 1066 | * It's safe to allow irq's after cr2 has been saved and the | ||
| 1067 | * vmalloc fault has been handled. | ||
| 1068 | * | ||
| 1069 | * User-mode registers count as a user access even for any | ||
| 1070 | * potential system fault or CPU buglet: | ||
| 1071 | */ | ||
| 1072 | if (user_mode_vm(regs)) { | ||
| 1073 | local_irq_enable(); | ||
| 1074 | error_code |= PF_USER; | ||
| 1075 | flags |= FAULT_FLAG_USER; | ||
| 1076 | } else { | ||
| 1077 | if (regs->flags & X86_EFLAGS_IF) | ||
| 1078 | local_irq_enable(); | ||
| 1079 | } | ||
| 1080 | 1065 | ||
| 1081 | if (unlikely(error_code & PF_RSVD)) | 1066 | if (unlikely(error_code & PF_RSVD)) |
| 1082 | pgtable_bad(regs, error_code, address); | 1067 | pgtable_bad(regs, error_code, address); |
| @@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 1088 | } | 1073 | } |
| 1089 | } | 1074 | } |
| 1090 | 1075 | ||
| 1091 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
| 1092 | |||
| 1093 | /* | 1076 | /* |
| 1094 | * If we're in an interrupt, have no user context or are running | 1077 | * If we're in an interrupt, have no user context or are running |
| 1095 | * in an atomic region then we must not take the fault: | 1078 | * in an atomic region then we must not take the fault: |
| @@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 1099 | return; | 1082 | return; |
| 1100 | } | 1083 | } |
| 1101 | 1084 | ||
| 1085 | /* | ||
| 1086 | * It's safe to allow irq's after cr2 has been saved and the | ||
| 1087 | * vmalloc fault has been handled. | ||
| 1088 | * | ||
| 1089 | * User-mode registers count as a user access even for any | ||
| 1090 | * potential system fault or CPU buglet: | ||
| 1091 | */ | ||
| 1092 | if (user_mode_vm(regs)) { | ||
| 1093 | local_irq_enable(); | ||
| 1094 | error_code |= PF_USER; | ||
| 1095 | flags |= FAULT_FLAG_USER; | ||
| 1096 | } else { | ||
| 1097 | if (regs->flags & X86_EFLAGS_IF) | ||
| 1098 | local_irq_enable(); | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
| 1102 | |||
| 1102 | if (error_code & PF_WRITE) | 1103 | if (error_code & PF_WRITE) |
| 1103 | flags |= FAULT_FLAG_WRITE; | 1104 | flags |= FAULT_FLAG_WRITE; |
| 1104 | 1105 | ||
