diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-19 08:02:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-21 05:37:56 -0400 |
commit | 1c024eca51fdc965290acf342ae16a476c2189d0 (patch) | |
tree | 28dc160cc70a20eeb8b8825d6d52ea88a6188413 /kernel/trace/trace_event_perf.c | |
parent | b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d (diff) |
perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
Avoid the swevent hash-table by using per-tracepoint
hlists.
Also, avoid conditionals on the fast path by ordering
with probe unregister so that we should never get on
the callback path without the data being there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.473188012@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace/trace_event_perf.c')
-rw-r--r-- | kernel/trace/trace_event_perf.c | 127 |
1 files changed, 67 insertions, 60 deletions
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a1304f8c4440..39d5ea7b0653 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | |||
23 | /* Count the events in use (per event id, not per instance) */ | 23 | /* Count the events in use (per event id, not per instance) */ |
24 | static int total_ref_count; | 24 | static int total_ref_count; |
25 | 25 | ||
26 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | 26 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
27 | struct perf_event *p_event) | ||
27 | { | 28 | { |
29 | struct hlist_head *list; | ||
28 | int ret = -ENOMEM; | 30 | int ret = -ENOMEM; |
31 | int cpu; | ||
29 | 32 | ||
30 | if (event->perf_refcount++ > 0) { | 33 | p_event->tp_event = tp_event; |
31 | event->perf_data = NULL; | 34 | if (tp_event->perf_refcount++ > 0) |
32 | return 0; | 35 | return 0; |
33 | } | 36 | |
37 | list = alloc_percpu(struct hlist_head); | ||
38 | if (!list) | ||
39 | goto fail; | ||
40 | |||
41 | for_each_possible_cpu(cpu) | ||
42 | INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); | ||
43 | |||
44 | tp_event->perf_events = list; | ||
34 | 45 | ||
35 | if (!total_ref_count) { | 46 | if (!total_ref_count) { |
36 | char *buf; | 47 | char *buf; |
@@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
39 | for (i = 0; i < 4; i++) { | 50 | for (i = 0; i < 4; i++) { |
40 | buf = (char *)alloc_percpu(perf_trace_t); | 51 | buf = (char *)alloc_percpu(perf_trace_t); |
41 | if (!buf) | 52 | if (!buf) |
42 | goto fail_buf; | 53 | goto fail; |
43 | 54 | ||
44 | rcu_assign_pointer(perf_trace_buf[i], buf); | 55 | perf_trace_buf[i] = buf; |
45 | } | 56 | } |
46 | } | 57 | } |
47 | 58 | ||
48 | ret = event->perf_event_enable(event); | 59 | ret = tp_event->perf_event_enable(tp_event); |
49 | if (!ret) { | 60 | if (ret) |
50 | event->perf_data = data; | 61 | goto fail; |
51 | total_ref_count++; | ||
52 | return 0; | ||
53 | } | ||
54 | 62 | ||
55 | fail_buf: | 63 | total_ref_count++; |
64 | return 0; | ||
65 | |||
66 | fail: | ||
56 | if (!total_ref_count) { | 67 | if (!total_ref_count) { |
57 | int i; | 68 | int i; |
58 | 69 | ||
@@ -61,21 +72,26 @@ fail_buf: | |||
61 | perf_trace_buf[i] = NULL; | 72 | perf_trace_buf[i] = NULL; |
62 | } | 73 | } |
63 | } | 74 | } |
64 | event->perf_refcount--; | 75 | |
76 | if (!--tp_event->perf_refcount) { | ||
77 | free_percpu(tp_event->perf_events); | ||
78 | tp_event->perf_events = NULL; | ||
79 | } | ||
65 | 80 | ||
66 | return ret; | 81 | return ret; |
67 | } | 82 | } |
68 | 83 | ||
69 | int perf_trace_enable(int event_id, void *data) | 84 | int perf_trace_init(struct perf_event *p_event) |
70 | { | 85 | { |
71 | struct ftrace_event_call *event; | 86 | struct ftrace_event_call *tp_event; |
87 | int event_id = p_event->attr.config; | ||
72 | int ret = -EINVAL; | 88 | int ret = -EINVAL; |
73 | 89 | ||
74 | mutex_lock(&event_mutex); | 90 | mutex_lock(&event_mutex); |
75 | list_for_each_entry(event, &ftrace_events, list) { | 91 | list_for_each_entry(tp_event, &ftrace_events, list) { |
76 | if (event->id == event_id && event->perf_event_enable && | 92 | if (tp_event->id == event_id && tp_event->perf_event_enable && |
77 | try_module_get(event->mod)) { | 93 | try_module_get(tp_event->mod)) { |
78 | ret = perf_trace_event_enable(event, data); | 94 | ret = perf_trace_event_init(tp_event, p_event); |
79 | break; | 95 | break; |
80 | } | 96 | } |
81 | } | 97 | } |
@@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data) | |||
84 | return ret; | 100 | return ret; |
85 | } | 101 | } |
86 | 102 | ||
87 | static void perf_trace_event_disable(struct ftrace_event_call *event) | 103 | int perf_trace_enable(struct perf_event *p_event) |
88 | { | 104 | { |
89 | if (--event->perf_refcount > 0) | 105 | struct ftrace_event_call *tp_event = p_event->tp_event; |
90 | return; | 106 | struct hlist_head *list; |
91 | 107 | ||
92 | event->perf_event_disable(event); | 108 | list = tp_event->perf_events; |
109 | if (WARN_ON_ONCE(!list)) | ||
110 | return -EINVAL; | ||
93 | 111 | ||
94 | if (!--total_ref_count) { | 112 | list = per_cpu_ptr(list, smp_processor_id()); |
95 | char *buf[4]; | 113 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
96 | int i; | ||
97 | |||
98 | for (i = 0; i < 4; i++) { | ||
99 | buf[i] = perf_trace_buf[i]; | ||
100 | rcu_assign_pointer(perf_trace_buf[i], NULL); | ||
101 | } | ||
102 | 114 | ||
103 | /* | 115 | return 0; |
104 | * Ensure every events in profiling have finished before | 116 | } |
105 | * releasing the buffers | ||
106 | */ | ||
107 | synchronize_sched(); | ||
108 | 117 | ||
109 | for (i = 0; i < 4; i++) | 118 | void perf_trace_disable(struct perf_event *p_event) |
110 | free_percpu(buf[i]); | 119 | { |
111 | } | 120 | hlist_del_rcu(&p_event->hlist_entry); |
112 | } | 121 | } |
113 | 122 | ||
114 | void perf_trace_disable(int event_id) | 123 | void perf_trace_destroy(struct perf_event *p_event) |
115 | { | 124 | { |
116 | struct ftrace_event_call *event; | 125 | struct ftrace_event_call *tp_event = p_event->tp_event; |
126 | int i; | ||
117 | 127 | ||
118 | mutex_lock(&event_mutex); | 128 | if (--tp_event->perf_refcount > 0) |
119 | list_for_each_entry(event, &ftrace_events, list) { | 129 | return; |
120 | if (event->id == event_id) { | 130 | |
121 | perf_trace_event_disable(event); | 131 | tp_event->perf_event_disable(tp_event); |
122 | module_put(event->mod); | 132 | |
123 | break; | 133 | free_percpu(tp_event->perf_events); |
134 | tp_event->perf_events = NULL; | ||
135 | |||
136 | if (!--total_ref_count) { | ||
137 | for (i = 0; i < 4; i++) { | ||
138 | free_percpu(perf_trace_buf[i]); | ||
139 | perf_trace_buf[i] = NULL; | ||
124 | } | 140 | } |
125 | } | 141 | } |
126 | mutex_unlock(&event_mutex); | ||
127 | } | 142 | } |
128 | 143 | ||
129 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 144 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
130 | struct pt_regs *regs, int *rctxp) | 145 | struct pt_regs *regs, int *rctxp) |
131 | { | 146 | { |
132 | struct trace_entry *entry; | 147 | struct trace_entry *entry; |
133 | char *trace_buf, *raw_data; | 148 | char *raw_data; |
134 | int pc; | 149 | int pc; |
135 | 150 | ||
136 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 151 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
@@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
139 | 154 | ||
140 | *rctxp = perf_swevent_get_recursion_context(); | 155 | *rctxp = perf_swevent_get_recursion_context(); |
141 | if (*rctxp < 0) | 156 | if (*rctxp < 0) |
142 | goto err_recursion; | 157 | return NULL; |
143 | |||
144 | trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); | ||
145 | if (!trace_buf) | ||
146 | goto err; | ||
147 | 158 | ||
148 | raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); | 159 | raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); |
149 | 160 | ||
150 | /* zero the dead bytes from align to not leak stack to user */ | 161 | /* zero the dead bytes from align to not leak stack to user */ |
151 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 162 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
@@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
155 | entry->type = type; | 166 | entry->type = type; |
156 | 167 | ||
157 | return raw_data; | 168 | return raw_data; |
158 | err: | ||
159 | perf_swevent_put_recursion_context(*rctxp); | ||
160 | err_recursion: | ||
161 | return NULL; | ||
162 | } | 169 | } |
163 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 170 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |