diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-19 04:52:27 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-21 05:37:56 -0400 |
commit | b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d (patch) | |
tree | 341c3a03743108bbdf82ee0eed9f4c4085f1b694 /kernel/trace/trace_event_perf.c | |
parent | 1c34bde13a3cdcd4c7c6322f8052e67c2c91caf1 (diff) |
perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction
Improves performance.
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1274259525.5605.10352.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace/trace_event_perf.c')
-rw-r--r-- | kernel/trace/trace_event_perf.c | 73 |
1 files changed, 29 insertions, 44 deletions
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 89b780a7c522..a1304f8c4440 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -9,13 +9,9 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include "trace.h" | 10 | #include "trace.h" |
11 | 11 | ||
12 | DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); | ||
13 | EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); | ||
14 | |||
15 | EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); | 12 | EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); |
16 | 13 | ||
17 | static char *perf_trace_buf; | 14 | static char *perf_trace_buf[4]; |
18 | static char *perf_trace_buf_nmi; | ||
19 | 15 | ||
20 | /* | 16 | /* |
21 | * Force it to be aligned to unsigned long to avoid misaligned accesses | 17 | * Force it to be aligned to unsigned long to avoid misaligned accesses |
@@ -29,7 +25,6 @@ static int total_ref_count; | |||
29 | 25 | ||
30 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | 26 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) |
31 | { | 27 | { |
32 | char *buf; | ||
33 | int ret = -ENOMEM; | 28 | int ret = -ENOMEM; |
34 | 29 | ||
35 | if (event->perf_refcount++ > 0) { | 30 | if (event->perf_refcount++ > 0) { |
@@ -38,17 +33,16 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
38 | } | 33 | } |
39 | 34 | ||
40 | if (!total_ref_count) { | 35 | if (!total_ref_count) { |
41 | buf = (char *)alloc_percpu(perf_trace_t); | 36 | char *buf; |
42 | if (!buf) | 37 | int i; |
43 | goto fail_buf; | ||
44 | |||
45 | rcu_assign_pointer(perf_trace_buf, buf); | ||
46 | 38 | ||
47 | buf = (char *)alloc_percpu(perf_trace_t); | 39 | for (i = 0; i < 4; i++) { |
48 | if (!buf) | 40 | buf = (char *)alloc_percpu(perf_trace_t); |
49 | goto fail_buf_nmi; | 41 | if (!buf) |
42 | goto fail_buf; | ||
50 | 43 | ||
51 | rcu_assign_pointer(perf_trace_buf_nmi, buf); | 44 | rcu_assign_pointer(perf_trace_buf[i], buf); |
45 | } | ||
52 | } | 46 | } |
53 | 47 | ||
54 | ret = event->perf_event_enable(event); | 48 | ret = event->perf_event_enable(event); |
@@ -58,14 +52,15 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
58 | return 0; | 52 | return 0; |
59 | } | 53 | } |
60 | 54 | ||
61 | fail_buf_nmi: | 55 | fail_buf: |
62 | if (!total_ref_count) { | 56 | if (!total_ref_count) { |
63 | free_percpu(perf_trace_buf_nmi); | 57 | int i; |
64 | free_percpu(perf_trace_buf); | 58 | |
65 | perf_trace_buf_nmi = NULL; | 59 | for (i = 0; i < 4; i++) { |
66 | perf_trace_buf = NULL; | 60 | free_percpu(perf_trace_buf[i]); |
61 | perf_trace_buf[i] = NULL; | ||
62 | } | ||
67 | } | 63 | } |
68 | fail_buf: | ||
69 | event->perf_refcount--; | 64 | event->perf_refcount--; |
70 | 65 | ||
71 | return ret; | 66 | return ret; |
@@ -91,19 +86,19 @@ int perf_trace_enable(int event_id, void *data) | |||
91 | 86 | ||
92 | static void perf_trace_event_disable(struct ftrace_event_call *event) | 87 | static void perf_trace_event_disable(struct ftrace_event_call *event) |
93 | { | 88 | { |
94 | char *buf, *nmi_buf; | ||
95 | |||
96 | if (--event->perf_refcount > 0) | 89 | if (--event->perf_refcount > 0) |
97 | return; | 90 | return; |
98 | 91 | ||
99 | event->perf_event_disable(event); | 92 | event->perf_event_disable(event); |
100 | 93 | ||
101 | if (!--total_ref_count) { | 94 | if (!--total_ref_count) { |
102 | buf = perf_trace_buf; | 95 | char *buf[4]; |
103 | rcu_assign_pointer(perf_trace_buf, NULL); | 96 | int i; |
104 | 97 | ||
105 | nmi_buf = perf_trace_buf_nmi; | 98 | for (i = 0; i < 4; i++) { |
106 | rcu_assign_pointer(perf_trace_buf_nmi, NULL); | 99 | buf[i] = perf_trace_buf[i]; |
100 | rcu_assign_pointer(perf_trace_buf[i], NULL); | ||
101 | } | ||
107 | 102 | ||
108 | /* | 103 | /* |
109 | * Ensure every events in profiling have finished before | 104 | * Ensure every events in profiling have finished before |
@@ -111,8 +106,8 @@ static void perf_trace_event_disable(struct ftrace_event_call *event) | |||
111 | */ | 106 | */ |
112 | synchronize_sched(); | 107 | synchronize_sched(); |
113 | 108 | ||
114 | free_percpu(buf); | 109 | for (i = 0; i < 4; i++) |
115 | free_percpu(nmi_buf); | 110 | free_percpu(buf[i]); |
116 | } | 111 | } |
117 | } | 112 | } |
118 | 113 | ||
@@ -132,47 +127,37 @@ void perf_trace_disable(int event_id) | |||
132 | } | 127 | } |
133 | 128 | ||
134 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 129 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
135 | int *rctxp, unsigned long *irq_flags) | 130 | struct pt_regs *regs, int *rctxp) |
136 | { | 131 | { |
137 | struct trace_entry *entry; | 132 | struct trace_entry *entry; |
138 | char *trace_buf, *raw_data; | 133 | char *trace_buf, *raw_data; |
139 | int pc, cpu; | 134 | int pc; |
140 | 135 | ||
141 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 136 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
142 | 137 | ||
143 | pc = preempt_count(); | 138 | pc = preempt_count(); |
144 | 139 | ||
145 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
146 | local_irq_save(*irq_flags); | ||
147 | |||
148 | *rctxp = perf_swevent_get_recursion_context(); | 140 | *rctxp = perf_swevent_get_recursion_context(); |
149 | if (*rctxp < 0) | 141 | if (*rctxp < 0) |
150 | goto err_recursion; | 142 | goto err_recursion; |
151 | 143 | ||
152 | cpu = smp_processor_id(); | 144 | trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); |
153 | |||
154 | if (in_nmi()) | ||
155 | trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); | ||
156 | else | ||
157 | trace_buf = rcu_dereference_sched(perf_trace_buf); | ||
158 | |||
159 | if (!trace_buf) | 145 | if (!trace_buf) |
160 | goto err; | 146 | goto err; |
161 | 147 | ||
162 | raw_data = per_cpu_ptr(trace_buf, cpu); | 148 | raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); |
163 | 149 | ||
164 | /* zero the dead bytes from align to not leak stack to user */ | 150 | /* zero the dead bytes from align to not leak stack to user */ |
165 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 151 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
166 | 152 | ||
167 | entry = (struct trace_entry *)raw_data; | 153 | entry = (struct trace_entry *)raw_data; |
168 | tracing_generic_entry_update(entry, *irq_flags, pc); | 154 | tracing_generic_entry_update(entry, regs->flags, pc); |
169 | entry->type = type; | 155 | entry->type = type; |
170 | 156 | ||
171 | return raw_data; | 157 | return raw_data; |
172 | err: | 158 | err: |
173 | perf_swevent_put_recursion_context(*rctxp); | 159 | perf_swevent_put_recursion_context(*rctxp); |
174 | err_recursion: | 160 | err_recursion: |
175 | local_irq_restore(*irq_flags); | ||
176 | return NULL; | 161 | return NULL; |
177 | } | 162 | } |
178 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 163 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |