diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-19 08:02:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-21 05:37:56 -0400 |
commit | 1c024eca51fdc965290acf342ae16a476c2189d0 (patch) | |
tree | 28dc160cc70a20eeb8b8825d6d52ea88a6188413 | |
parent | b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d (diff) |
perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
Avoid the swevent hash-table by using per-tracepoint
hlists.
Also, avoid conditionals on the fast path by ordering
with probe unregister so that we should never get on
the callback path without the data being there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.473188012@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/ftrace_event.h | 16 | ||||
-rw-r--r-- | include/linux/perf_event.h | 6 | ||||
-rw-r--r-- | include/trace/ftrace.h | 4 | ||||
-rw-r--r-- | kernel/perf_event.c | 94 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 127 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 11 |
7 files changed, 143 insertions, 124 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 126071bc90ab..7024b7d1126f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -133,7 +133,7 @@ struct ftrace_event_call { | |||
133 | void *data; | 133 | void *data; |
134 | 134 | ||
135 | int perf_refcount; | 135 | int perf_refcount; |
136 | void *perf_data; | 136 | struct hlist_head *perf_events; |
137 | int (*perf_event_enable)(struct ftrace_event_call *); | 137 | int (*perf_event_enable)(struct ftrace_event_call *); |
138 | void (*perf_event_disable)(struct ftrace_event_call *); | 138 | void (*perf_event_disable)(struct ftrace_event_call *); |
139 | }; | 139 | }; |
@@ -192,9 +192,11 @@ struct perf_event; | |||
192 | 192 | ||
193 | DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); | 193 | DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); |
194 | 194 | ||
195 | extern int perf_trace_enable(int event_id, void *data); | 195 | extern int perf_trace_init(struct perf_event *event); |
196 | extern void perf_trace_disable(int event_id); | 196 | extern void perf_trace_destroy(struct perf_event *event); |
197 | extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, | 197 | extern int perf_trace_enable(struct perf_event *event); |
198 | extern void perf_trace_disable(struct perf_event *event); | ||
199 | extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, | ||
198 | char *filter_str); | 200 | char *filter_str); |
199 | extern void ftrace_profile_free_filter(struct perf_event *event); | 201 | extern void ftrace_profile_free_filter(struct perf_event *event); |
200 | extern void *perf_trace_buf_prepare(int size, unsigned short type, | 202 | extern void *perf_trace_buf_prepare(int size, unsigned short type, |
@@ -202,11 +204,9 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type, | |||
202 | 204 | ||
203 | static inline void | 205 | static inline void |
204 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, | 206 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, |
205 | u64 count, struct pt_regs *regs, void *event) | 207 | u64 count, struct pt_regs *regs, void *head) |
206 | { | 208 | { |
207 | struct trace_entry *entry = raw_data; | 209 | perf_tp_event(addr, count, raw_data, size, regs, head); |
208 | |||
209 | perf_tp_event(entry->type, addr, count, raw_data, size, regs, event); | ||
210 | perf_swevent_put_recursion_context(rctx); | 210 | perf_swevent_put_recursion_context(rctx); |
211 | } | 211 | } |
212 | #endif | 212 | #endif |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe50347dc645..7cd7b356447d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -727,6 +727,7 @@ struct perf_event { | |||
727 | perf_overflow_handler_t overflow_handler; | 727 | perf_overflow_handler_t overflow_handler; |
728 | 728 | ||
729 | #ifdef CONFIG_EVENT_TRACING | 729 | #ifdef CONFIG_EVENT_TRACING |
730 | struct ftrace_event_call *tp_event; | ||
730 | struct event_filter *filter; | 731 | struct event_filter *filter; |
731 | #endif | 732 | #endif |
732 | 733 | ||
@@ -992,8 +993,9 @@ static inline bool perf_paranoid_kernel(void) | |||
992 | } | 993 | } |
993 | 994 | ||
994 | extern void perf_event_init(void); | 995 | extern void perf_event_init(void); |
995 | extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 996 | extern void perf_tp_event(u64 addr, u64 count, void *record, |
996 | int entry_size, struct pt_regs *regs, void *event); | 997 | int entry_size, struct pt_regs *regs, |
998 | struct hlist_head *head); | ||
997 | extern void perf_bp_event(struct perf_event *event, void *data); | 999 | extern void perf_bp_event(struct perf_event *event, void *data); |
998 | 1000 | ||
999 | #ifndef perf_misc_flags | 1001 | #ifndef perf_misc_flags |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index f282885057dd..4eb2148f1321 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -768,6 +768,7 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ | |||
768 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ | 768 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ |
769 | struct ftrace_raw_##call *entry; \ | 769 | struct ftrace_raw_##call *entry; \ |
770 | u64 __addr = 0, __count = 1; \ | 770 | u64 __addr = 0, __count = 1; \ |
771 | struct hlist_head *head; \ | ||
771 | int __entry_size; \ | 772 | int __entry_size; \ |
772 | int __data_size; \ | 773 | int __data_size; \ |
773 | int rctx; \ | 774 | int rctx; \ |
@@ -790,8 +791,9 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ | |||
790 | \ | 791 | \ |
791 | { assign; } \ | 792 | { assign; } \ |
792 | \ | 793 | \ |
794 | head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\ | ||
793 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ | 795 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ |
794 | __count, __regs, event_call->perf_data); \ | 796 | __count, __regs, head); \ |
795 | } | 797 | } |
796 | 798 | ||
797 | #undef DEFINE_EVENT | 799 | #undef DEFINE_EVENT |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 45b7aec55458..3f2cc313ee25 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -4005,9 +4005,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
4005 | perf_swevent_overflow(event, 0, nmi, data, regs); | 4005 | perf_swevent_overflow(event, 0, nmi, data, regs); |
4006 | } | 4006 | } |
4007 | 4007 | ||
4008 | static int perf_tp_event_match(struct perf_event *event, | ||
4009 | struct perf_sample_data *data); | ||
4010 | |||
4011 | static int perf_exclude_event(struct perf_event *event, | 4008 | static int perf_exclude_event(struct perf_event *event, |
4012 | struct pt_regs *regs) | 4009 | struct pt_regs *regs) |
4013 | { | 4010 | { |
@@ -4037,10 +4034,6 @@ static int perf_swevent_match(struct perf_event *event, | |||
4037 | if (perf_exclude_event(event, regs)) | 4034 | if (perf_exclude_event(event, regs)) |
4038 | return 0; | 4035 | return 0; |
4039 | 4036 | ||
4040 | if (event->attr.type == PERF_TYPE_TRACEPOINT && | ||
4041 | !perf_tp_event_match(event, data)) | ||
4042 | return 0; | ||
4043 | |||
4044 | return 1; | 4037 | return 1; |
4045 | } | 4038 | } |
4046 | 4039 | ||
@@ -4122,7 +4115,7 @@ end: | |||
4122 | 4115 | ||
4123 | int perf_swevent_get_recursion_context(void) | 4116 | int perf_swevent_get_recursion_context(void) |
4124 | { | 4117 | { |
4125 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 4118 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
4126 | int rctx; | 4119 | int rctx; |
4127 | 4120 | ||
4128 | if (in_nmi()) | 4121 | if (in_nmi()) |
@@ -4134,10 +4127,8 @@ int perf_swevent_get_recursion_context(void) | |||
4134 | else | 4127 | else |
4135 | rctx = 0; | 4128 | rctx = 0; |
4136 | 4129 | ||
4137 | if (cpuctx->recursion[rctx]) { | 4130 | if (cpuctx->recursion[rctx]) |
4138 | put_cpu_var(perf_cpu_context); | ||
4139 | return -1; | 4131 | return -1; |
4140 | } | ||
4141 | 4132 | ||
4142 | cpuctx->recursion[rctx]++; | 4133 | cpuctx->recursion[rctx]++; |
4143 | barrier(); | 4134 | barrier(); |
@@ -4151,7 +4142,6 @@ void perf_swevent_put_recursion_context(int rctx) | |||
4151 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4142 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
4152 | barrier(); | 4143 | barrier(); |
4153 | cpuctx->recursion[rctx]--; | 4144 | cpuctx->recursion[rctx]--; |
4154 | put_cpu_var(perf_cpu_context); | ||
4155 | } | 4145 | } |
4156 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | 4146 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); |
4157 | 4147 | ||
@@ -4162,6 +4152,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, | |||
4162 | struct perf_sample_data data; | 4152 | struct perf_sample_data data; |
4163 | int rctx; | 4153 | int rctx; |
4164 | 4154 | ||
4155 | preempt_disable_notrace(); | ||
4165 | rctx = perf_swevent_get_recursion_context(); | 4156 | rctx = perf_swevent_get_recursion_context(); |
4166 | if (rctx < 0) | 4157 | if (rctx < 0) |
4167 | return; | 4158 | return; |
@@ -4171,6 +4162,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, | |||
4171 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | 4162 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); |
4172 | 4163 | ||
4173 | perf_swevent_put_recursion_context(rctx); | 4164 | perf_swevent_put_recursion_context(rctx); |
4165 | preempt_enable_notrace(); | ||
4174 | } | 4166 | } |
4175 | 4167 | ||
4176 | static void perf_swevent_read(struct perf_event *event) | 4168 | static void perf_swevent_read(struct perf_event *event) |
@@ -4486,11 +4478,43 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4486 | 4478 | ||
4487 | #ifdef CONFIG_EVENT_TRACING | 4479 | #ifdef CONFIG_EVENT_TRACING |
4488 | 4480 | ||
4489 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4481 | static const struct pmu perf_ops_tracepoint = { |
4490 | int entry_size, struct pt_regs *regs, void *event) | 4482 | .enable = perf_trace_enable, |
4483 | .disable = perf_trace_disable, | ||
4484 | .read = perf_swevent_read, | ||
4485 | .unthrottle = perf_swevent_unthrottle, | ||
4486 | }; | ||
4487 | |||
4488 | static int perf_tp_filter_match(struct perf_event *event, | ||
4489 | struct perf_sample_data *data) | ||
4490 | { | ||
4491 | void *record = data->raw->data; | ||
4492 | |||
4493 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4494 | return 1; | ||
4495 | return 0; | ||
4496 | } | ||
4497 | |||
4498 | static int perf_tp_event_match(struct perf_event *event, | ||
4499 | struct perf_sample_data *data, | ||
4500 | struct pt_regs *regs) | ||
4501 | { | ||
4502 | if (perf_exclude_event(event, regs)) | ||
4503 | return 0; | ||
4504 | |||
4505 | if (!perf_tp_filter_match(event, data)) | ||
4506 | return 0; | ||
4507 | |||
4508 | return 1; | ||
4509 | } | ||
4510 | |||
4511 | void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | ||
4512 | struct pt_regs *regs, struct hlist_head *head) | ||
4491 | { | 4513 | { |
4492 | const int type = PERF_TYPE_TRACEPOINT; | ||
4493 | struct perf_sample_data data; | 4514 | struct perf_sample_data data; |
4515 | struct perf_event *event; | ||
4516 | struct hlist_node *node; | ||
4517 | |||
4494 | struct perf_raw_record raw = { | 4518 | struct perf_raw_record raw = { |
4495 | .size = entry_size, | 4519 | .size = entry_size, |
4496 | .data = record, | 4520 | .data = record, |
@@ -4499,30 +4523,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | |||
4499 | perf_sample_data_init(&data, addr); | 4523 | perf_sample_data_init(&data, addr); |
4500 | data.raw = &raw; | 4524 | data.raw = &raw; |
4501 | 4525 | ||
4502 | if (!event) { | 4526 | rcu_read_lock(); |
4503 | do_perf_sw_event(type, event_id, count, 1, &data, regs); | 4527 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
4504 | return; | 4528 | if (perf_tp_event_match(event, &data, regs)) |
4529 | perf_swevent_add(event, count, 1, &data, regs); | ||
4505 | } | 4530 | } |
4506 | 4531 | rcu_read_unlock(); | |
4507 | if (perf_swevent_match(event, type, event_id, &data, regs)) | ||
4508 | perf_swevent_add(event, count, 1, &data, regs); | ||
4509 | } | 4532 | } |
4510 | EXPORT_SYMBOL_GPL(perf_tp_event); | 4533 | EXPORT_SYMBOL_GPL(perf_tp_event); |
4511 | 4534 | ||
4512 | static int perf_tp_event_match(struct perf_event *event, | ||
4513 | struct perf_sample_data *data) | ||
4514 | { | ||
4515 | void *record = data->raw->data; | ||
4516 | |||
4517 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4518 | return 1; | ||
4519 | return 0; | ||
4520 | } | ||
4521 | |||
4522 | static void tp_perf_event_destroy(struct perf_event *event) | 4535 | static void tp_perf_event_destroy(struct perf_event *event) |
4523 | { | 4536 | { |
4524 | perf_trace_disable(event->attr.config); | 4537 | perf_trace_destroy(event); |
4525 | swevent_hlist_put(event); | ||
4526 | } | 4538 | } |
4527 | 4539 | ||
4528 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4540 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
@@ -4538,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4538 | !capable(CAP_SYS_ADMIN)) | 4550 | !capable(CAP_SYS_ADMIN)) |
4539 | return ERR_PTR(-EPERM); | 4551 | return ERR_PTR(-EPERM); |
4540 | 4552 | ||
4541 | if (perf_trace_enable(event->attr.config, event)) | 4553 | err = perf_trace_init(event); |
4554 | if (err) | ||
4542 | return NULL; | 4555 | return NULL; |
4543 | 4556 | ||
4544 | event->destroy = tp_perf_event_destroy; | 4557 | event->destroy = tp_perf_event_destroy; |
4545 | err = swevent_hlist_get(event); | ||
4546 | if (err) { | ||
4547 | perf_trace_disable(event->attr.config); | ||
4548 | return ERR_PTR(err); | ||
4549 | } | ||
4550 | 4558 | ||
4551 | return &perf_ops_generic; | 4559 | return &perf_ops_tracepoint; |
4552 | } | 4560 | } |
4553 | 4561 | ||
4554 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4562 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4576,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4576 | 4584 | ||
4577 | #else | 4585 | #else |
4578 | 4586 | ||
4579 | static int perf_tp_event_match(struct perf_event *event, | ||
4580 | struct perf_sample_data *data) | ||
4581 | { | ||
4582 | return 1; | ||
4583 | } | ||
4584 | |||
4585 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4587 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
4586 | { | 4588 | { |
4587 | return NULL; | 4589 | return NULL; |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a1304f8c4440..39d5ea7b0653 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | |||
23 | /* Count the events in use (per event id, not per instance) */ | 23 | /* Count the events in use (per event id, not per instance) */ |
24 | static int total_ref_count; | 24 | static int total_ref_count; |
25 | 25 | ||
26 | static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | 26 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
27 | struct perf_event *p_event) | ||
27 | { | 28 | { |
29 | struct hlist_head *list; | ||
28 | int ret = -ENOMEM; | 30 | int ret = -ENOMEM; |
31 | int cpu; | ||
29 | 32 | ||
30 | if (event->perf_refcount++ > 0) { | 33 | p_event->tp_event = tp_event; |
31 | event->perf_data = NULL; | 34 | if (tp_event->perf_refcount++ > 0) |
32 | return 0; | 35 | return 0; |
33 | } | 36 | |
37 | list = alloc_percpu(struct hlist_head); | ||
38 | if (!list) | ||
39 | goto fail; | ||
40 | |||
41 | for_each_possible_cpu(cpu) | ||
42 | INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); | ||
43 | |||
44 | tp_event->perf_events = list; | ||
34 | 45 | ||
35 | if (!total_ref_count) { | 46 | if (!total_ref_count) { |
36 | char *buf; | 47 | char *buf; |
@@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) | |||
39 | for (i = 0; i < 4; i++) { | 50 | for (i = 0; i < 4; i++) { |
40 | buf = (char *)alloc_percpu(perf_trace_t); | 51 | buf = (char *)alloc_percpu(perf_trace_t); |
41 | if (!buf) | 52 | if (!buf) |
42 | goto fail_buf; | 53 | goto fail; |
43 | 54 | ||
44 | rcu_assign_pointer(perf_trace_buf[i], buf); | 55 | perf_trace_buf[i] = buf; |
45 | } | 56 | } |
46 | } | 57 | } |
47 | 58 | ||
48 | ret = event->perf_event_enable(event); | 59 | ret = tp_event->perf_event_enable(tp_event); |
49 | if (!ret) { | 60 | if (ret) |
50 | event->perf_data = data; | 61 | goto fail; |
51 | total_ref_count++; | ||
52 | return 0; | ||
53 | } | ||
54 | 62 | ||
55 | fail_buf: | 63 | total_ref_count++; |
64 | return 0; | ||
65 | |||
66 | fail: | ||
56 | if (!total_ref_count) { | 67 | if (!total_ref_count) { |
57 | int i; | 68 | int i; |
58 | 69 | ||
@@ -61,21 +72,26 @@ fail_buf: | |||
61 | perf_trace_buf[i] = NULL; | 72 | perf_trace_buf[i] = NULL; |
62 | } | 73 | } |
63 | } | 74 | } |
64 | event->perf_refcount--; | 75 | |
76 | if (!--tp_event->perf_refcount) { | ||
77 | free_percpu(tp_event->perf_events); | ||
78 | tp_event->perf_events = NULL; | ||
79 | } | ||
65 | 80 | ||
66 | return ret; | 81 | return ret; |
67 | } | 82 | } |
68 | 83 | ||
69 | int perf_trace_enable(int event_id, void *data) | 84 | int perf_trace_init(struct perf_event *p_event) |
70 | { | 85 | { |
71 | struct ftrace_event_call *event; | 86 | struct ftrace_event_call *tp_event; |
87 | int event_id = p_event->attr.config; | ||
72 | int ret = -EINVAL; | 88 | int ret = -EINVAL; |
73 | 89 | ||
74 | mutex_lock(&event_mutex); | 90 | mutex_lock(&event_mutex); |
75 | list_for_each_entry(event, &ftrace_events, list) { | 91 | list_for_each_entry(tp_event, &ftrace_events, list) { |
76 | if (event->id == event_id && event->perf_event_enable && | 92 | if (tp_event->id == event_id && tp_event->perf_event_enable && |
77 | try_module_get(event->mod)) { | 93 | try_module_get(tp_event->mod)) { |
78 | ret = perf_trace_event_enable(event, data); | 94 | ret = perf_trace_event_init(tp_event, p_event); |
79 | break; | 95 | break; |
80 | } | 96 | } |
81 | } | 97 | } |
@@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data) | |||
84 | return ret; | 100 | return ret; |
85 | } | 101 | } |
86 | 102 | ||
87 | static void perf_trace_event_disable(struct ftrace_event_call *event) | 103 | int perf_trace_enable(struct perf_event *p_event) |
88 | { | 104 | { |
89 | if (--event->perf_refcount > 0) | 105 | struct ftrace_event_call *tp_event = p_event->tp_event; |
90 | return; | 106 | struct hlist_head *list; |
91 | 107 | ||
92 | event->perf_event_disable(event); | 108 | list = tp_event->perf_events; |
109 | if (WARN_ON_ONCE(!list)) | ||
110 | return -EINVAL; | ||
93 | 111 | ||
94 | if (!--total_ref_count) { | 112 | list = per_cpu_ptr(list, smp_processor_id()); |
95 | char *buf[4]; | 113 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
96 | int i; | ||
97 | |||
98 | for (i = 0; i < 4; i++) { | ||
99 | buf[i] = perf_trace_buf[i]; | ||
100 | rcu_assign_pointer(perf_trace_buf[i], NULL); | ||
101 | } | ||
102 | 114 | ||
103 | /* | 115 | return 0; |
104 | * Ensure every events in profiling have finished before | 116 | } |
105 | * releasing the buffers | ||
106 | */ | ||
107 | synchronize_sched(); | ||
108 | 117 | ||
109 | for (i = 0; i < 4; i++) | 118 | void perf_trace_disable(struct perf_event *p_event) |
110 | free_percpu(buf[i]); | 119 | { |
111 | } | 120 | hlist_del_rcu(&p_event->hlist_entry); |
112 | } | 121 | } |
113 | 122 | ||
114 | void perf_trace_disable(int event_id) | 123 | void perf_trace_destroy(struct perf_event *p_event) |
115 | { | 124 | { |
116 | struct ftrace_event_call *event; | 125 | struct ftrace_event_call *tp_event = p_event->tp_event; |
126 | int i; | ||
117 | 127 | ||
118 | mutex_lock(&event_mutex); | 128 | if (--tp_event->perf_refcount > 0) |
119 | list_for_each_entry(event, &ftrace_events, list) { | 129 | return; |
120 | if (event->id == event_id) { | 130 | |
121 | perf_trace_event_disable(event); | 131 | tp_event->perf_event_disable(tp_event); |
122 | module_put(event->mod); | 132 | |
123 | break; | 133 | free_percpu(tp_event->perf_events); |
134 | tp_event->perf_events = NULL; | ||
135 | |||
136 | if (!--total_ref_count) { | ||
137 | for (i = 0; i < 4; i++) { | ||
138 | free_percpu(perf_trace_buf[i]); | ||
139 | perf_trace_buf[i] = NULL; | ||
124 | } | 140 | } |
125 | } | 141 | } |
126 | mutex_unlock(&event_mutex); | ||
127 | } | 142 | } |
128 | 143 | ||
129 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 144 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
130 | struct pt_regs *regs, int *rctxp) | 145 | struct pt_regs *regs, int *rctxp) |
131 | { | 146 | { |
132 | struct trace_entry *entry; | 147 | struct trace_entry *entry; |
133 | char *trace_buf, *raw_data; | 148 | char *raw_data; |
134 | int pc; | 149 | int pc; |
135 | 150 | ||
136 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 151 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
@@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
139 | 154 | ||
140 | *rctxp = perf_swevent_get_recursion_context(); | 155 | *rctxp = perf_swevent_get_recursion_context(); |
141 | if (*rctxp < 0) | 156 | if (*rctxp < 0) |
142 | goto err_recursion; | 157 | return NULL; |
143 | |||
144 | trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); | ||
145 | if (!trace_buf) | ||
146 | goto err; | ||
147 | 158 | ||
148 | raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); | 159 | raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); |
149 | 160 | ||
150 | /* zero the dead bytes from align to not leak stack to user */ | 161 | /* zero the dead bytes from align to not leak stack to user */ |
151 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 162 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
@@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
155 | entry->type = type; | 166 | entry->type = type; |
156 | 167 | ||
157 | return raw_data; | 168 | return raw_data; |
158 | err: | ||
159 | perf_swevent_put_recursion_context(*rctxp); | ||
160 | err_recursion: | ||
161 | return NULL; | ||
162 | } | 169 | } |
163 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 170 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 20c96de0aea0..4681f60dac00 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1341 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1341 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1342 | struct ftrace_event_call *call = &tp->call; | 1342 | struct ftrace_event_call *call = &tp->call; |
1343 | struct kprobe_trace_entry_head *entry; | 1343 | struct kprobe_trace_entry_head *entry; |
1344 | struct hlist_head *head; | ||
1344 | u8 *data; | 1345 | u8 *data; |
1345 | int size, __size, i; | 1346 | int size, __size, i; |
1346 | int rctx; | 1347 | int rctx; |
@@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1361 | for (i = 0; i < tp->nr_args; i++) | 1362 | for (i = 0; i < tp->nr_args; i++) |
1362 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1363 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1363 | 1364 | ||
1364 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data); | 1365 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); |
1366 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | ||
1365 | } | 1367 | } |
1366 | 1368 | ||
1367 | /* Kretprobe profile handler */ | 1369 | /* Kretprobe profile handler */ |
@@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1371 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1373 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1372 | struct ftrace_event_call *call = &tp->call; | 1374 | struct ftrace_event_call *call = &tp->call; |
1373 | struct kretprobe_trace_entry_head *entry; | 1375 | struct kretprobe_trace_entry_head *entry; |
1376 | struct hlist_head *head; | ||
1374 | u8 *data; | 1377 | u8 *data; |
1375 | int size, __size, i; | 1378 | int size, __size, i; |
1376 | int rctx; | 1379 | int rctx; |
@@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1392 | for (i = 0; i < tp->nr_args; i++) | 1395 | for (i = 0; i < tp->nr_args; i++) |
1393 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1396 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1394 | 1397 | ||
1395 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, | 1398 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); |
1396 | regs, call->perf_data); | 1399 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); |
1397 | } | 1400 | } |
1398 | 1401 | ||
1399 | static int probe_perf_enable(struct ftrace_event_call *call) | 1402 | static int probe_perf_enable(struct ftrace_event_call *call) |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a657cefbb137..eb769f270291 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) | |||
438 | { | 438 | { |
439 | struct syscall_metadata *sys_data; | 439 | struct syscall_metadata *sys_data; |
440 | struct syscall_trace_enter *rec; | 440 | struct syscall_trace_enter *rec; |
441 | struct hlist_head *head; | ||
441 | int syscall_nr; | 442 | int syscall_nr; |
442 | int rctx; | 443 | int rctx; |
443 | int size; | 444 | int size; |
@@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) | |||
467 | rec->nr = syscall_nr; | 468 | rec->nr = syscall_nr; |
468 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 469 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
469 | (unsigned long *)&rec->args); | 470 | (unsigned long *)&rec->args); |
470 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, | 471 | |
471 | sys_data->enter_event->perf_data); | 472 | head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); |
473 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | ||
472 | } | 474 | } |
473 | 475 | ||
474 | int perf_sysenter_enable(struct ftrace_event_call *call) | 476 | int perf_sysenter_enable(struct ftrace_event_call *call) |
@@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) | |||
510 | { | 512 | { |
511 | struct syscall_metadata *sys_data; | 513 | struct syscall_metadata *sys_data; |
512 | struct syscall_trace_exit *rec; | 514 | struct syscall_trace_exit *rec; |
515 | struct hlist_head *head; | ||
513 | int syscall_nr; | 516 | int syscall_nr; |
514 | int rctx; | 517 | int rctx; |
515 | int size; | 518 | int size; |
@@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) | |||
542 | rec->nr = syscall_nr; | 545 | rec->nr = syscall_nr; |
543 | rec->ret = syscall_get_return_value(current, regs); | 546 | rec->ret = syscall_get_return_value(current, regs); |
544 | 547 | ||
545 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, | 548 | head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); |
546 | sys_data->exit_event->perf_data); | 549 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); |
547 | } | 550 | } |
548 | 551 | ||
549 | int perf_sysexit_enable(struct ftrace_event_call *call) | 552 | int perf_sysexit_enable(struct ftrace_event_call *call) |