diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-19 08:02:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-21 05:37:56 -0400 |
commit | 1c024eca51fdc965290acf342ae16a476c2189d0 (patch) | |
tree | 28dc160cc70a20eeb8b8825d6d52ea88a6188413 /kernel/trace | |
parent | b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d (diff) |
perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
Avoid the swevent hash-table by using per-tracepoint
hlists.
Also, avoid conditionals on the fast path by ordering
with probe unregister so that we should never get on
the callback path without the data being there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.473188012@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace_event_perf.c | 127 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 11 |
3 files changed, 80 insertions, 67 deletions
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a1304f8c4440..39d5ea7b0653 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | |||
23 | /* Count the events in use (per event id, not per instance) */ | 23 | /* Count the events in use (per event id, not per instance) */ |
24 | static int total_ref_count; | 24 | static int total_ref_count; |
25 | 25 | ||
26 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | 26 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
27 | struct perf_event *p_event) | ||
27 | { | 28 | { |
29 | struct hlist_head *list; | ||
28 | int ret = -ENOMEM; | 30 | int ret = -ENOMEM; |
31 | int cpu; | ||
29 | 32 | ||
30 | if (event->perf_refcount++ > 0) { | 33 | p_event->tp_event = tp_event; |
31 | event->perf_data = NULL; | 34 | if (tp_event->perf_refcount++ > 0) |
32 | return 0; | 35 | return 0; |
33 | } | 36 | |
37 | list = alloc_percpu(struct hlist_head); | ||
38 | if (!list) | ||
39 | goto fail; | ||
40 | |||
41 | for_each_possible_cpu(cpu) | ||
42 | INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); | ||
43 | |||
44 | tp_event->perf_events = list; | ||
34 | 45 | ||
35 | if (!total_ref_count) { | 46 | if (!total_ref_count) { |
36 | char *buf; | 47 | char *buf; |
@@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
39 | for (i = 0; i < 4; i++) { | 50 | for (i = 0; i < 4; i++) { |
40 | buf = (char *)alloc_percpu(perf_trace_t); | 51 | buf = (char *)alloc_percpu(perf_trace_t); |
41 | if (!buf) | 52 | if (!buf) |
42 | goto fail_buf; | 53 | goto fail; |
43 | 54 | ||
44 | rcu_assign_pointer(perf_trace_buf[i], buf); | 55 | perf_trace_buf[i] = buf; |
45 | } | 56 | } |
46 | } | 57 | } |
47 | 58 | ||
48 | ret = event->perf_event_enable(event); | 59 | ret = tp_event->perf_event_enable(tp_event); |
49 | if (!ret) { | 60 | if (ret) |
50 | event->perf_data = data; | 61 | goto fail; |
51 | total_ref_count++; | ||
52 | return 0; | ||
53 | } | ||
54 | 62 | ||
55 | fail_buf: | 63 | total_ref_count++; |
64 | return 0; | ||
65 | |||
66 | fail: | ||
56 | if (!total_ref_count) { | 67 | if (!total_ref_count) { |
57 | int i; | 68 | int i; |
58 | 69 | ||
@@ -61,21 +72,26 @@ fail_buf: | |||
61 | perf_trace_buf[i] = NULL; | 72 | perf_trace_buf[i] = NULL; |
62 | } | 73 | } |
63 | } | 74 | } |
64 | event->perf_refcount--; | 75 | |
76 | if (!--tp_event->perf_refcount) { | ||
77 | free_percpu(tp_event->perf_events); | ||
78 | tp_event->perf_events = NULL; | ||
79 | } | ||
65 | 80 | ||
66 | return ret; | 81 | return ret; |
67 | } | 82 | } |
68 | 83 | ||
69 | int perf_trace_enable(int event_id, void *data) | 84 | int perf_trace_init(struct perf_event *p_event) |
70 | { | 85 | { |
71 | struct ftrace_event_call *event; | 86 | struct ftrace_event_call *tp_event; |
87 | int event_id = p_event->attr.config; | ||
72 | int ret = -EINVAL; | 88 | int ret = -EINVAL; |
73 | 89 | ||
74 | mutex_lock(&event_mutex); | 90 | mutex_lock(&event_mutex); |
75 | list_for_each_entry(event, &ftrace_events, list) { | 91 | list_for_each_entry(tp_event, &ftrace_events, list) { |
76 | if (event->id == event_id && event->perf_event_enable && | 92 | if (tp_event->id == event_id && tp_event->perf_event_enable && |
77 | try_module_get(event->mod)) { | 93 | try_module_get(tp_event->mod)) { |
78 | ret = perf_trace_event_enable(event, data); | 94 | ret = perf_trace_event_init(tp_event, p_event); |
79 | break; | 95 | break; |
80 | } | 96 | } |
81 | } | 97 | } |
@@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data) | |||
84 | return ret; | 100 | return ret; |
85 | } | 101 | } |
86 | 102 | ||
87 | static void perf_trace_event_disable(struct ftrace_event_call *event) | 103 | int perf_trace_enable(struct perf_event *p_event) |
88 | { | 104 | { |
89 | if (--event->perf_refcount > 0) | 105 | struct ftrace_event_call *tp_event = p_event->tp_event; |
90 | return; | 106 | struct hlist_head *list; |
91 | 107 | ||
92 | event->perf_event_disable(event); | 108 | list = tp_event->perf_events; |
109 | if (WARN_ON_ONCE(!list)) | ||
110 | return -EINVAL; | ||
93 | 111 | ||
94 | if (!--total_ref_count) { | 112 | list = per_cpu_ptr(list, smp_processor_id()); |
95 | char *buf[4]; | 113 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
96 | int i; | ||
97 | |||
98 | for (i = 0; i < 4; i++) { | ||
99 | buf[i] = perf_trace_buf[i]; | ||
100 | rcu_assign_pointer(perf_trace_buf[i], NULL); | ||
101 | } | ||
102 | 114 | ||
103 | /* | 115 | return 0; |
104 | * Ensure every events in profiling have finished before | 116 | } |
105 | * releasing the buffers | ||
106 | */ | ||
107 | synchronize_sched(); | ||
108 | 117 | ||
109 | for (i = 0; i < 4; i++) | 118 | void perf_trace_disable(struct perf_event *p_event) |
110 | free_percpu(buf[i]); | 119 | { |
111 | } | 120 | hlist_del_rcu(&p_event->hlist_entry); |
112 | } | 121 | } |
113 | 122 | ||
114 | void perf_trace_disable(int event_id) | 123 | void perf_trace_destroy(struct perf_event *p_event) |
115 | { | 124 | { |
116 | struct ftrace_event_call *event; | 125 | struct ftrace_event_call *tp_event = p_event->tp_event; |
126 | int i; | ||
117 | 127 | ||
118 | mutex_lock(&event_mutex); | 128 | if (--tp_event->perf_refcount > 0) |
119 | list_for_each_entry(event, &ftrace_events, list) { | 129 | return; |
120 | if (event->id == event_id) { | 130 | |
121 | perf_trace_event_disable(event); | 131 | tp_event->perf_event_disable(tp_event); |
122 | module_put(event->mod); | 132 | |
123 | break; | 133 | free_percpu(tp_event->perf_events); |
134 | tp_event->perf_events = NULL; | ||
135 | |||
136 | if (!--total_ref_count) { | ||
137 | for (i = 0; i < 4; i++) { | ||
138 | free_percpu(perf_trace_buf[i]); | ||
139 | perf_trace_buf[i] = NULL; | ||
124 | } | 140 | } |
125 | } | 141 | } |
126 | mutex_unlock(&event_mutex); | ||
127 | } | 142 | } |
128 | 143 | ||
129 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 144 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
130 | struct pt_regs *regs, int *rctxp) | 145 | struct pt_regs *regs, int *rctxp) |
131 | { | 146 | { |
132 | struct trace_entry *entry; | 147 | struct trace_entry *entry; |
133 | char *trace_buf, *raw_data; | 148 | char *raw_data; |
134 | int pc; | 149 | int pc; |
135 | 150 | ||
136 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 151 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
@@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
139 | 154 | ||
140 | *rctxp = perf_swevent_get_recursion_context(); | 155 | *rctxp = perf_swevent_get_recursion_context(); |
141 | if (*rctxp < 0) | 156 | if (*rctxp < 0) |
142 | goto err_recursion; | 157 | return NULL; |
143 | |||
144 | trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); | ||
145 | if (!trace_buf) | ||
146 | goto err; | ||
147 | 158 | ||
148 | raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); | 159 | raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); |
149 | 160 | ||
150 | /* zero the dead bytes from align to not leak stack to user */ | 161 | /* zero the dead bytes from align to not leak stack to user */ |
151 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 162 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
@@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
155 | entry->type = type; | 166 | entry->type = type; |
156 | 167 | ||
157 | return raw_data; | 168 | return raw_data; |
158 | err: | ||
159 | perf_swevent_put_recursion_context(*rctxp); | ||
160 | err_recursion: | ||
161 | return NULL; | ||
162 | } | 169 | } |
163 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 170 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 20c96de0aea0..4681f60dac00 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1341 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1341 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1342 | struct ftrace_event_call *call = &tp->call; | 1342 | struct ftrace_event_call *call = &tp->call; |
1343 | struct kprobe_trace_entry_head *entry; | 1343 | struct kprobe_trace_entry_head *entry; |
1344 | struct hlist_head *head; | ||
1344 | u8 *data; | 1345 | u8 *data; |
1345 | int size, __size, i; | 1346 | int size, __size, i; |
1346 | int rctx; | 1347 | int rctx; |
@@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1361 | for (i = 0; i < tp->nr_args; i++) | 1362 | for (i = 0; i < tp->nr_args; i++) |
1362 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1363 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1363 | 1364 | ||
1364 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data); | 1365 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); |
1366 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | ||
1365 | } | 1367 | } |
1366 | 1368 | ||
1367 | /* Kretprobe profile handler */ | 1369 | /* Kretprobe profile handler */ |
@@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1371 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1373 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1372 | struct ftrace_event_call *call = &tp->call; | 1374 | struct ftrace_event_call *call = &tp->call; |
1373 | struct kretprobe_trace_entry_head *entry; | 1375 | struct kretprobe_trace_entry_head *entry; |
1376 | struct hlist_head *head; | ||
1374 | u8 *data; | 1377 | u8 *data; |
1375 | int size, __size, i; | 1378 | int size, __size, i; |
1376 | int rctx; | 1379 | int rctx; |
@@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1392 | for (i = 0; i < tp->nr_args; i++) | 1395 | for (i = 0; i < tp->nr_args; i++) |
1393 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1396 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1394 | 1397 | ||
1395 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, | 1398 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); |
1396 | regs, call->perf_data); | 1399 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); |
1397 | } | 1400 | } |
1398 | 1401 | ||
1399 | static int probe_perf_enable(struct ftrace_event_call *call) | 1402 | static int probe_perf_enable(struct ftrace_event_call *call) |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a657cefbb137..eb769f270291 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) | |||
438 | { | 438 | { |
439 | struct syscall_metadata *sys_data; | 439 | struct syscall_metadata *sys_data; |
440 | struct syscall_trace_enter *rec; | 440 | struct syscall_trace_enter *rec; |
441 | struct hlist_head *head; | ||
441 | int syscall_nr; | 442 | int syscall_nr; |
442 | int rctx; | 443 | int rctx; |
443 | int size; | 444 | int size; |
@@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) | |||
467 | rec->nr = syscall_nr; | 468 | rec->nr = syscall_nr; |
468 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 469 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
469 | (unsigned long *)&rec->args); | 470 | (unsigned long *)&rec->args); |
470 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, | 471 | |
471 | sys_data->enter_event->perf_data); | 472 | head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); |
473 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | ||
472 | } | 474 | } |
473 | 475 | ||
474 | int perf_sysenter_enable(struct ftrace_event_call *call) | 476 | int perf_sysenter_enable(struct ftrace_event_call *call) |
@@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) | |||
510 | { | 512 | { |
511 | struct syscall_metadata *sys_data; | 513 | struct syscall_metadata *sys_data; |
512 | struct syscall_trace_exit *rec; | 514 | struct syscall_trace_exit *rec; |
515 | struct hlist_head *head; | ||
513 | int syscall_nr; | 516 | int syscall_nr; |
514 | int rctx; | 517 | int rctx; |
515 | int size; | 518 | int size; |
@@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) | |||
542 | rec->nr = syscall_nr; | 545 | rec->nr = syscall_nr; |
543 | rec->ret = syscall_get_return_value(current, regs); | 546 | rec->ret = syscall_get_return_value(current, regs); |
544 | 547 | ||
545 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, | 548 | head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); |
546 | sys_data->exit_event->perf_data); | 549 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); |
547 | } | 550 | } |
548 | 551 | ||
549 | int perf_sysexit_enable(struct ftrace_event_call *call) | 552 | int perf_sysexit_enable(struct ftrace_event_call *call) |