diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-19 04:52:27 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-21 05:37:56 -0400 |
commit | b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d (patch) | |
tree | 341c3a03743108bbdf82ee0eed9f4c4085f1b694 | |
parent | 1c34bde13a3cdcd4c7c6322f8052e67c2c91caf1 (diff) |
perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction
Improves performance.
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1274259525.5605.10352.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/ftrace_event.h | 9 | ||||
-rw-r--r-- | include/trace/ftrace.h | 17 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 73 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 10 |
5 files changed, 47 insertions, 72 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a9775dd7f7fe..126071bc90ab 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -197,20 +197,17 @@ extern void perf_trace_disable(int event_id); | |||
197 | extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, | 197 | extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, |
198 | char *filter_str); | 198 | char *filter_str); |
199 | extern void ftrace_profile_free_filter(struct perf_event *event); | 199 | extern void ftrace_profile_free_filter(struct perf_event *event); |
200 | extern void * | 200 | extern void *perf_trace_buf_prepare(int size, unsigned short type, |
201 | perf_trace_buf_prepare(int size, unsigned short type, int *rctxp, | 201 | struct pt_regs *regs, int *rctxp); |
202 | unsigned long *irq_flags); | ||
203 | 202 | ||
204 | static inline void | 203 | static inline void |
205 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, | 204 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, |
206 | u64 count, unsigned long irq_flags, struct pt_regs *regs, | 205 | u64 count, struct pt_regs *regs, void *event) |
207 | void *event) | ||
208 | { | 206 | { |
209 | struct trace_entry *entry = raw_data; | 207 | struct trace_entry *entry = raw_data; |
210 | 208 | ||
211 | perf_tp_event(entry->type, addr, count, raw_data, size, regs, event); | 209 | perf_tp_event(entry->type, addr, count, raw_data, size, regs, event); |
212 | perf_swevent_put_recursion_context(rctx); | 210 | perf_swevent_put_recursion_context(rctx); |
213 | local_irq_restore(irq_flags); | ||
214 | } | 211 | } |
215 | #endif | 212 | #endif |
216 | 213 | ||
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1016b2162935..f282885057dd 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -768,7 +768,6 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ | |||
768 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ | 768 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ |
769 | struct ftrace_raw_##call *entry; \ | 769 | struct ftrace_raw_##call *entry; \ |
770 | u64 __addr = 0, __count = 1; \ | 770 | u64 __addr = 0, __count = 1; \ |
771 | unsigned long irq_flags; \ | ||
772 | int __entry_size; \ | 771 | int __entry_size; \ |
773 | int __data_size; \ | 772 | int __data_size; \ |
774 | int rctx; \ | 773 | int rctx; \ |
@@ -781,17 +780,18 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ | |||
781 | if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \ | 780 | if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \ |
782 | "profile buffer not large enough")) \ | 781 | "profile buffer not large enough")) \ |
783 | return; \ | 782 | return; \ |
783 | \ | ||
784 | entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \ | 784 | entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \ |
785 | __entry_size, event_call->id, &rctx, &irq_flags); \ | 785 | __entry_size, event_call->id, __regs, &rctx); \ |
786 | if (!entry) \ | 786 | if (!entry) \ |
787 | return; \ | 787 | return; \ |
788 | \ | ||
788 | tstruct \ | 789 | tstruct \ |
789 | \ | 790 | \ |
790 | { assign; } \ | 791 | { assign; } \ |
791 | \ | 792 | \ |
792 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ | 793 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ |
793 | __count, irq_flags, __regs, \ | 794 | __count, __regs, event_call->perf_data); \ |
794 | event_call->perf_data); \ | ||
795 | } | 795 | } |
796 | 796 | ||
797 | #undef DEFINE_EVENT | 797 | #undef DEFINE_EVENT |
@@ -799,13 +799,10 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ | |||
799 | static notrace void perf_trace_##call(proto) \ | 799 | static notrace void perf_trace_##call(proto) \ |
800 | { \ | 800 | { \ |
801 | struct ftrace_event_call *event_call = &event_##call; \ | 801 | struct ftrace_event_call *event_call = &event_##call; \ |
802 | struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \ | 802 | struct pt_regs __regs; \ |
803 | \ | ||
804 | perf_fetch_caller_regs(__regs, 1); \ | ||
805 | \ | ||
806 | perf_trace_templ_##template(event_call, __regs, args); \ | ||
807 | \ | 803 | \ |
808 | put_cpu_var(perf_trace_regs); \ | 804 | perf_fetch_caller_regs(&__regs, 1); \ |
805 | perf_trace_templ_##template(event_call, &__regs, args); \ | ||
809 | } | 806 | } |
810 | 807 | ||
811 | #undef DEFINE_EVENT_PRINT | 808 | #undef DEFINE_EVENT_PRINT |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 89b780a7c522..a1304f8c4440 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -9,13 +9,9 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include "trace.h" | 10 | #include "trace.h" |
11 | 11 | ||
12 | DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); | ||
13 | EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); | ||
14 | |||
15 | EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); | 12 | EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); |
16 | 13 | ||
17 | static char *perf_trace_buf; | 14 | static char *perf_trace_buf[4]; |
18 | static char *perf_trace_buf_nmi; | ||
19 | 15 | ||
20 | /* | 16 | /* |
21 | * Force it to be aligned to unsigned long to avoid misaligned accesses | 17 | * Force it to be aligned to unsigned long to avoid misaligned accesses |
@@ -29,7 +25,6 @@ static int total_ref_count; | |||
29 | 25 | ||
30 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | 26 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) |
31 | { | 27 | { |
32 | char *buf; | ||
33 | int ret = -ENOMEM; | 28 | int ret = -ENOMEM; |
34 | 29 | ||
35 | if (event->perf_refcount++ > 0) { | 30 | if (event->perf_refcount++ > 0) { |
@@ -38,17 +33,16 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
38 | } | 33 | } |
39 | 34 | ||
40 | if (!total_ref_count) { | 35 | if (!total_ref_count) { |
41 | buf = (char *)alloc_percpu(perf_trace_t); | 36 | char *buf; |
42 | if (!buf) | 37 | int i; |
43 | goto fail_buf; | ||
44 | |||
45 | rcu_assign_pointer(perf_trace_buf, buf); | ||
46 | 38 | ||
47 | buf = (char *)alloc_percpu(perf_trace_t); | 39 | for (i = 0; i < 4; i++) { |
48 | if (!buf) | 40 | buf = (char *)alloc_percpu(perf_trace_t); |
49 | goto fail_buf_nmi; | 41 | if (!buf) |
42 | goto fail_buf; | ||
50 | 43 | ||
51 | rcu_assign_pointer(perf_trace_buf_nmi, buf); | 44 | rcu_assign_pointer(perf_trace_buf[i], buf); |
45 | } | ||
52 | } | 46 | } |
53 | 47 | ||
54 | ret = event->perf_event_enable(event); | 48 | ret = event->perf_event_enable(event); |
@@ -58,14 +52,15 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
58 | return 0; | 52 | return 0; |
59 | } | 53 | } |
60 | 54 | ||
61 | fail_buf_nmi: | 55 | fail_buf: |
62 | if (!total_ref_count) { | 56 | if (!total_ref_count) { |
63 | free_percpu(perf_trace_buf_nmi); | 57 | int i; |
64 | free_percpu(perf_trace_buf); | 58 | |
65 | perf_trace_buf_nmi = NULL; | 59 | for (i = 0; i < 4; i++) { |
66 | perf_trace_buf = NULL; | 60 | free_percpu(perf_trace_buf[i]); |
61 | perf_trace_buf[i] = NULL; | ||
62 | } | ||
67 | } | 63 | } |
68 | fail_buf: | ||
69 | event->perf_refcount--; | 64 | event->perf_refcount--; |
70 | 65 | ||
71 | return ret; | 66 | return ret; |
@@ -91,19 +86,19 @@ int perf_trace_enable(int event_id, void *data) | |||
91 | 86 | ||
92 | static void perf_trace_event_disable(struct ftrace_event_call *event) | 87 | static void perf_trace_event_disable(struct ftrace_event_call *event) |
93 | { | 88 | { |
94 | char *buf, *nmi_buf; | ||
95 | |||
96 | if (--event->perf_refcount > 0) | 89 | if (--event->perf_refcount > 0) |
97 | return; | 90 | return; |
98 | 91 | ||
99 | event->perf_event_disable(event); | 92 | event->perf_event_disable(event); |
100 | 93 | ||
101 | if (!--total_ref_count) { | 94 | if (!--total_ref_count) { |
102 | buf = perf_trace_buf; | 95 | char *buf[4]; |
103 | rcu_assign_pointer(perf_trace_buf, NULL); | 96 | int i; |
104 | 97 | ||
105 | nmi_buf = perf_trace_buf_nmi; | 98 | for (i = 0; i < 4; i++) { |
106 | rcu_assign_pointer(perf_trace_buf_nmi, NULL); | 99 | buf[i] = perf_trace_buf[i]; |
100 | rcu_assign_pointer(perf_trace_buf[i], NULL); | ||
101 | } | ||
107 | 102 | ||
108 | /* | 103 | /* |
109 | * Ensure every events in profiling have finished before | 104 | * Ensure every events in profiling have finished before |
@@ -111,8 +106,8 @@ static void perf_trace_event_disable(struct ftrace_event_call *event) | |||
111 | */ | 106 | */ |
112 | synchronize_sched(); | 107 | synchronize_sched(); |
113 | 108 | ||
114 | free_percpu(buf); | 109 | for (i = 0; i < 4; i++) |
115 | free_percpu(nmi_buf); | 110 | free_percpu(buf[i]); |
116 | } | 111 | } |
117 | } | 112 | } |
118 | 113 | ||
@@ -132,47 +127,37 @@ void perf_trace_disable(int event_id) | |||
132 | } | 127 | } |
133 | 128 | ||
134 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 129 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
135 | int *rctxp, unsigned long *irq_flags) | 130 | struct pt_regs *regs, int *rctxp) |
136 | { | 131 | { |
137 | struct trace_entry *entry; | 132 | struct trace_entry *entry; |
138 | char *trace_buf, *raw_data; | 133 | char *trace_buf, *raw_data; |
139 | int pc, cpu; | 134 | int pc; |
140 | 135 | ||
141 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 136 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
142 | 137 | ||
143 | pc = preempt_count(); | 138 | pc = preempt_count(); |
144 | 139 | ||
145 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
146 | local_irq_save(*irq_flags); | ||
147 | |||
148 | *rctxp = perf_swevent_get_recursion_context(); | 140 | *rctxp = perf_swevent_get_recursion_context(); |
149 | if (*rctxp < 0) | 141 | if (*rctxp < 0) |
150 | goto err_recursion; | 142 | goto err_recursion; |
151 | 143 | ||
152 | cpu = smp_processor_id(); | 144 | trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); |
153 | |||
154 | if (in_nmi()) | ||
155 | trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); | ||
156 | else | ||
157 | trace_buf = rcu_dereference_sched(perf_trace_buf); | ||
158 | |||
159 | if (!trace_buf) | 145 | if (!trace_buf) |
160 | goto err; | 146 | goto err; |
161 | 147 | ||
162 | raw_data = per_cpu_ptr(trace_buf, cpu); | 148 | raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); |
163 | 149 | ||
164 | /* zero the dead bytes from align to not leak stack to user */ | 150 | /* zero the dead bytes from align to not leak stack to user */ |
165 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 151 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
166 | 152 | ||
167 | entry = (struct trace_entry *)raw_data; | 153 | entry = (struct trace_entry *)raw_data; |
168 | tracing_generic_entry_update(entry, *irq_flags, pc); | 154 | tracing_generic_entry_update(entry, regs->flags, pc); |
169 | entry->type = type; | 155 | entry->type = type; |
170 | 156 | ||
171 | return raw_data; | 157 | return raw_data; |
172 | err: | 158 | err: |
173 | perf_swevent_put_recursion_context(*rctxp); | 159 | perf_swevent_put_recursion_context(*rctxp); |
174 | err_recursion: | 160 | err_recursion: |
175 | local_irq_restore(*irq_flags); | ||
176 | return NULL; | 161 | return NULL; |
177 | } | 162 | } |
178 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 163 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2d7bf4146be8..20c96de0aea0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1343,7 +1343,6 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1343 | struct kprobe_trace_entry_head *entry; | 1343 | struct kprobe_trace_entry_head *entry; |
1344 | u8 *data; | 1344 | u8 *data; |
1345 | int size, __size, i; | 1345 | int size, __size, i; |
1346 | unsigned long irq_flags; | ||
1347 | int rctx; | 1346 | int rctx; |
1348 | 1347 | ||
1349 | __size = sizeof(*entry) + tp->size; | 1348 | __size = sizeof(*entry) + tp->size; |
@@ -1353,7 +1352,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1353 | "profile buffer not large enough")) | 1352 | "profile buffer not large enough")) |
1354 | return; | 1353 | return; |
1355 | 1354 | ||
1356 | entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); | 1355 | entry = perf_trace_buf_prepare(size, call->id, regs, &rctx); |
1357 | if (!entry) | 1356 | if (!entry) |
1358 | return; | 1357 | return; |
1359 | 1358 | ||
@@ -1362,7 +1361,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1362 | for (i = 0; i < tp->nr_args; i++) | 1361 | for (i = 0; i < tp->nr_args; i++) |
1363 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1362 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1364 | 1363 | ||
1365 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs, call->perf_data); | 1364 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data); |
1366 | } | 1365 | } |
1367 | 1366 | ||
1368 | /* Kretprobe profile handler */ | 1367 | /* Kretprobe profile handler */ |
@@ -1374,7 +1373,6 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1374 | struct kretprobe_trace_entry_head *entry; | 1373 | struct kretprobe_trace_entry_head *entry; |
1375 | u8 *data; | 1374 | u8 *data; |
1376 | int size, __size, i; | 1375 | int size, __size, i; |
1377 | unsigned long irq_flags; | ||
1378 | int rctx; | 1376 | int rctx; |
1379 | 1377 | ||
1380 | __size = sizeof(*entry) + tp->size; | 1378 | __size = sizeof(*entry) + tp->size; |
@@ -1384,7 +1382,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1384 | "profile buffer not large enough")) | 1382 | "profile buffer not large enough")) |
1385 | return; | 1383 | return; |
1386 | 1384 | ||
1387 | entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); | 1385 | entry = perf_trace_buf_prepare(size, call->id, regs, &rctx); |
1388 | if (!entry) | 1386 | if (!entry) |
1389 | return; | 1387 | return; |
1390 | 1388 | ||
@@ -1395,7 +1393,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1395 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1393 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1396 | 1394 | ||
1397 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, | 1395 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, |
1398 | irq_flags, regs, call->perf_data); | 1396 | regs, call->perf_data); |
1399 | } | 1397 | } |
1400 | 1398 | ||
1401 | static int probe_perf_enable(struct ftrace_event_call *call) | 1399 | static int probe_perf_enable(struct ftrace_event_call *call) |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9eff1a4b49b9..a657cefbb137 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -438,7 +438,6 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) | |||
438 | { | 438 | { |
439 | struct syscall_metadata *sys_data; | 439 | struct syscall_metadata *sys_data; |
440 | struct syscall_trace_enter *rec; | 440 | struct syscall_trace_enter *rec; |
441 | unsigned long flags; | ||
442 | int syscall_nr; | 441 | int syscall_nr; |
443 | int rctx; | 442 | int rctx; |
444 | int size; | 443 | int size; |
@@ -461,14 +460,14 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) | |||
461 | return; | 460 | return; |
462 | 461 | ||
463 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | 462 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, |
464 | sys_data->enter_event->id, &rctx, &flags); | 463 | sys_data->enter_event->id, regs, &rctx); |
465 | if (!rec) | 464 | if (!rec) |
466 | return; | 465 | return; |
467 | 466 | ||
468 | rec->nr = syscall_nr; | 467 | rec->nr = syscall_nr; |
469 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 468 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
470 | (unsigned long *)&rec->args); | 469 | (unsigned long *)&rec->args); |
471 | perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs, | 470 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, |
472 | sys_data->enter_event->perf_data); | 471 | sys_data->enter_event->perf_data); |
473 | } | 472 | } |
474 | 473 | ||
@@ -511,7 +510,6 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) | |||
511 | { | 510 | { |
512 | struct syscall_metadata *sys_data; | 511 | struct syscall_metadata *sys_data; |
513 | struct syscall_trace_exit *rec; | 512 | struct syscall_trace_exit *rec; |
514 | unsigned long flags; | ||
515 | int syscall_nr; | 513 | int syscall_nr; |
516 | int rctx; | 514 | int rctx; |
517 | int size; | 515 | int size; |
@@ -537,14 +535,14 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) | |||
537 | return; | 535 | return; |
538 | 536 | ||
539 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | 537 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, |
540 | sys_data->exit_event->id, &rctx, &flags); | 538 | sys_data->exit_event->id, regs, &rctx); |
541 | if (!rec) | 539 | if (!rec) |
542 | return; | 540 | return; |
543 | 541 | ||
544 | rec->nr = syscall_nr; | 542 | rec->nr = syscall_nr; |
545 | rec->ret = syscall_get_return_value(current, regs); | 543 | rec->ret = syscall_get_return_value(current, regs); |
546 | 544 | ||
547 | perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs, | 545 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, |
548 | sys_data->exit_event->perf_data); | 546 | sys_data->exit_event->perf_data); |
549 | } | 547 | } |
550 | 548 | ||