diff options
| author | Andrew Vagin <avagin@openvz.org> | 2012-07-11 10:14:58 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-07-31 11:02:05 -0400 |
| commit | e6dab5ffab59e910ec0e3355f4a6f29f7a7be474 (patch) | |
| tree | 87acf0fb071b8d09794ac7d834cb256de030cceb | |
| parent | d07bdfd322d307789f15b427dbcc39257665356f (diff) | |
perf/trace: Add ability to set a target task for events
A few events are interesting not only for a current task.
For example, sched_stat_* events are interesting for a task
which wakes up. For this reason, it will be good if such
events will be delivered to a target task too.
Now a target task can be set by using __perf_task().
The original idea and a draft patch belongs to Peter Zijlstra.
I need these events for profiling sleep times. sched_switch is used for
getting callchains and sched_stat_* is used for getting time periods.
These events are combined in user space, then it can be analyzed by
perf tools.
Inspired-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arun Sharma <asharma@fb.com>
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342016098-213063-1-git-send-email-avagin@openvz.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | include/linux/ftrace_event.h | 5 | ||||
| -rw-r--r-- | include/linux/perf_event.h | 3 | ||||
| -rw-r--r-- | include/trace/events/sched.h | 4 | ||||
| -rw-r--r-- | include/trace/ftrace.h | 6 | ||||
| -rw-r--r-- | kernel/events/callchain.c | 9 | ||||
| -rw-r--r-- | kernel/events/core.c | 30 | ||||
| -rw-r--r-- | kernel/events/internal.h | 3 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 2 |
11 files changed, 60 insertions, 14 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index af961d6f7ab1..642928cf57b4 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
| @@ -306,9 +306,10 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type, | |||
| 306 | 306 | ||
| 307 | static inline void | 307 | static inline void |
| 308 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, | 308 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, |
| 309 | u64 count, struct pt_regs *regs, void *head) | 309 | u64 count, struct pt_regs *regs, void *head, |
| 310 | struct task_struct *task) | ||
| 310 | { | 311 | { |
| 311 | perf_tp_event(addr, count, raw_data, size, regs, head, rctx); | 312 | perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); |
| 312 | } | 313 | } |
| 313 | #endif | 314 | #endif |
| 314 | 315 | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 76c5c8b724a7..7602ccb3f40e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -1272,7 +1272,8 @@ static inline bool perf_paranoid_kernel(void) | |||
| 1272 | extern void perf_event_init(void); | 1272 | extern void perf_event_init(void); |
| 1273 | extern void perf_tp_event(u64 addr, u64 count, void *record, | 1273 | extern void perf_tp_event(u64 addr, u64 count, void *record, |
| 1274 | int entry_size, struct pt_regs *regs, | 1274 | int entry_size, struct pt_regs *regs, |
| 1275 | struct hlist_head *head, int rctx); | 1275 | struct hlist_head *head, int rctx, |
| 1276 | struct task_struct *task); | ||
| 1276 | extern void perf_bp_event(struct perf_event *event, void *data); | 1277 | extern void perf_bp_event(struct perf_event *event, void *data); |
| 1277 | 1278 | ||
| 1278 | #ifndef perf_misc_flags | 1279 | #ifndef perf_misc_flags |
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index ea7a2035456d..5a8671e8a67f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h | |||
| @@ -73,6 +73,9 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, | |||
| 73 | __entry->prio = p->prio; | 73 | __entry->prio = p->prio; |
| 74 | __entry->success = success; | 74 | __entry->success = success; |
| 75 | __entry->target_cpu = task_cpu(p); | 75 | __entry->target_cpu = task_cpu(p); |
| 76 | ) | ||
| 77 | TP_perf_assign( | ||
| 78 | __perf_task(p); | ||
| 76 | ), | 79 | ), |
| 77 | 80 | ||
| 78 | TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", | 81 | TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", |
| @@ -325,6 +328,7 @@ DECLARE_EVENT_CLASS(sched_stat_template, | |||
| 325 | ) | 328 | ) |
| 326 | TP_perf_assign( | 329 | TP_perf_assign( |
| 327 | __perf_count(delay); | 330 | __perf_count(delay); |
| 331 | __perf_task(tsk); | ||
| 328 | ), | 332 | ), |
| 329 | 333 | ||
| 330 | TP_printk("comm=%s pid=%d delay=%Lu [ns]", | 334 | TP_printk("comm=%s pid=%d delay=%Lu [ns]", |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c6bc2faaf261..a763888a36f9 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
| @@ -712,6 +712,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call | |||
| 712 | #undef __perf_count | 712 | #undef __perf_count |
| 713 | #define __perf_count(c) __count = (c) | 713 | #define __perf_count(c) __count = (c) |
| 714 | 714 | ||
| 715 | #undef __perf_task | ||
| 716 | #define __perf_task(t) __task = (t) | ||
| 717 | |||
| 715 | #undef TP_perf_assign | 718 | #undef TP_perf_assign |
| 716 | #define TP_perf_assign(args...) args | 719 | #define TP_perf_assign(args...) args |
| 717 | 720 | ||
| @@ -725,6 +728,7 @@ perf_trace_##call(void *__data, proto) \ | |||
| 725 | struct ftrace_raw_##call *entry; \ | 728 | struct ftrace_raw_##call *entry; \ |
| 726 | struct pt_regs __regs; \ | 729 | struct pt_regs __regs; \ |
| 727 | u64 __addr = 0, __count = 1; \ | 730 | u64 __addr = 0, __count = 1; \ |
| 731 | struct task_struct *__task = NULL; \ | ||
| 728 | struct hlist_head *head; \ | 732 | struct hlist_head *head; \ |
| 729 | int __entry_size; \ | 733 | int __entry_size; \ |
| 730 | int __data_size; \ | 734 | int __data_size; \ |
| @@ -752,7 +756,7 @@ perf_trace_##call(void *__data, proto) \ | |||
| 752 | \ | 756 | \ |
| 753 | head = this_cpu_ptr(event_call->perf_events); \ | 757 | head = this_cpu_ptr(event_call->perf_events); \ |
| 754 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ | 758 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ |
| 755 | __count, &__regs, head); \ | 759 | __count, &__regs, head, __task); \ |
| 756 | } | 760 | } |
| 757 | 761 | ||
| 758 | /* | 762 | /* |
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 6581a040f399..98d4597f43d6 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c | |||
| @@ -153,7 +153,8 @@ put_callchain_entry(int rctx) | |||
| 153 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | 153 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); |
| 154 | } | 154 | } |
| 155 | 155 | ||
| 156 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | 156 | struct perf_callchain_entry * |
| 157 | perf_callchain(struct perf_event *event, struct pt_regs *regs) | ||
| 157 | { | 158 | { |
| 158 | int rctx; | 159 | int rctx; |
| 159 | struct perf_callchain_entry *entry; | 160 | struct perf_callchain_entry *entry; |
| @@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
| 178 | } | 179 | } |
| 179 | 180 | ||
| 180 | if (regs) { | 181 | if (regs) { |
| 182 | /* | ||
| 183 | * Disallow cross-task user callchains. | ||
| 184 | */ | ||
| 185 | if (event->ctx->task && event->ctx->task != current) | ||
| 186 | goto exit_put; | ||
| 187 | |||
| 181 | perf_callchain_store(entry, PERF_CONTEXT_USER); | 188 | perf_callchain_store(entry, PERF_CONTEXT_USER); |
| 182 | perf_callchain_user(entry, regs); | 189 | perf_callchain_user(entry, regs); |
| 183 | } | 190 | } |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f1cf0edeb39a..b7935fcec7d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
| 4039 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 4039 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
| 4040 | int size = 1; | 4040 | int size = 1; |
| 4041 | 4041 | ||
| 4042 | data->callchain = perf_callchain(regs); | 4042 | data->callchain = perf_callchain(event, regs); |
| 4043 | 4043 | ||
| 4044 | if (data->callchain) | 4044 | if (data->callchain) |
| 4045 | size += data->callchain->nr; | 4045 | size += data->callchain->nr; |
| @@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event, | |||
| 5209 | } | 5209 | } |
| 5210 | 5210 | ||
| 5211 | void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | 5211 | void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, |
| 5212 | struct pt_regs *regs, struct hlist_head *head, int rctx) | 5212 | struct pt_regs *regs, struct hlist_head *head, int rctx, |
| 5213 | struct task_struct *task) | ||
| 5213 | { | 5214 | { |
| 5214 | struct perf_sample_data data; | 5215 | struct perf_sample_data data; |
| 5215 | struct perf_event *event; | 5216 | struct perf_event *event; |
| @@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
| 5228 | perf_swevent_event(event, count, &data, regs); | 5229 | perf_swevent_event(event, count, &data, regs); |
| 5229 | } | 5230 | } |
| 5230 | 5231 | ||
| 5232 | /* | ||
| 5233 | * If we got specified a target task, also iterate its context and | ||
| 5234 | * deliver this event there too. | ||
| 5235 | */ | ||
| 5236 | if (task && task != current) { | ||
| 5237 | struct perf_event_context *ctx; | ||
| 5238 | struct trace_entry *entry = record; | ||
| 5239 | |||
| 5240 | rcu_read_lock(); | ||
| 5241 | ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]); | ||
| 5242 | if (!ctx) | ||
| 5243 | goto unlock; | ||
| 5244 | |||
| 5245 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | ||
| 5246 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
| 5247 | continue; | ||
| 5248 | if (event->attr.config != entry->type) | ||
| 5249 | continue; | ||
| 5250 | if (perf_tp_event_match(event, &data, regs)) | ||
| 5251 | perf_swevent_event(event, count, &data, regs); | ||
| 5252 | } | ||
| 5253 | unlock: | ||
| 5254 | rcu_read_unlock(); | ||
| 5255 | } | ||
| 5256 | |||
| 5231 | perf_swevent_put_recursion_context(rctx); | 5257 | perf_swevent_put_recursion_context(rctx); |
| 5232 | } | 5258 | } |
| 5233 | EXPORT_SYMBOL_GPL(perf_tp_event); | 5259 | EXPORT_SYMBOL_GPL(perf_tp_event); |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index b0b107f90afc..a096c19f2c2a 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
| @@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle, | |||
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | /* Callchain handling */ | 103 | /* Callchain handling */ |
| 104 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | 104 | extern struct perf_callchain_entry * |
| 105 | perf_callchain(struct perf_event *event, struct pt_regs *regs); | ||
| 105 | extern int get_callchain_buffers(void); | 106 | extern int get_callchain_buffers(void); |
| 106 | extern void put_callchain_buffers(void); | 107 | extern void put_callchain_buffers(void); |
| 107 | 108 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index fee3752ae8f6..8a6d2ee2086c 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) | |||
| 281 | 281 | ||
| 282 | head = this_cpu_ptr(event_function.perf_events); | 282 | head = this_cpu_ptr(event_function.perf_events); |
| 283 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, | 283 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, |
| 284 | 1, ®s, head); | 284 | 1, ®s, head, NULL); |
| 285 | 285 | ||
| 286 | #undef ENTRY_SIZE | 286 | #undef ENTRY_SIZE |
| 287 | } | 287 | } |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b31d3d5699fe..1a2117043bb1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
| 1002 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1002 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
| 1003 | 1003 | ||
| 1004 | head = this_cpu_ptr(call->perf_events); | 1004 | head = this_cpu_ptr(call->perf_events); |
| 1005 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | 1005 | perf_trace_buf_submit(entry, size, rctx, |
| 1006 | entry->ip, 1, regs, head, NULL); | ||
| 1006 | } | 1007 | } |
| 1007 | 1008 | ||
| 1008 | /* Kretprobe profile handler */ | 1009 | /* Kretprobe profile handler */ |
| @@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
| 1033 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1034 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
| 1034 | 1035 | ||
| 1035 | head = this_cpu_ptr(call->perf_events); | 1036 | head = this_cpu_ptr(call->perf_events); |
| 1036 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); | 1037 | perf_trace_buf_submit(entry, size, rctx, |
| 1038 | entry->ret_ip, 1, regs, head, NULL); | ||
| 1037 | } | 1039 | } |
| 1038 | #endif /* CONFIG_PERF_EVENTS */ | 1040 | #endif /* CONFIG_PERF_EVENTS */ |
| 1039 | 1041 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 96fc73369099..60e4d7875672 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -532,7 +532,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
| 532 | (unsigned long *)&rec->args); | 532 | (unsigned long *)&rec->args); |
| 533 | 533 | ||
| 534 | head = this_cpu_ptr(sys_data->enter_event->perf_events); | 534 | head = this_cpu_ptr(sys_data->enter_event->perf_events); |
| 535 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | 535 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); |
| 536 | } | 536 | } |
| 537 | 537 | ||
| 538 | int perf_sysenter_enable(struct ftrace_event_call *call) | 538 | int perf_sysenter_enable(struct ftrace_event_call *call) |
| @@ -608,7 +608,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
| 608 | rec->ret = syscall_get_return_value(current, regs); | 608 | rec->ret = syscall_get_return_value(current, regs); |
| 609 | 609 | ||
| 610 | head = this_cpu_ptr(sys_data->exit_event->perf_events); | 610 | head = this_cpu_ptr(sys_data->exit_event->perf_events); |
| 611 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | 611 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); |
| 612 | } | 612 | } |
| 613 | 613 | ||
| 614 | int perf_sysexit_enable(struct ftrace_event_call *call) | 614 | int perf_sysexit_enable(struct ftrace_event_call *call) |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2b36ac68549e..03003cd7dd96 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | |||
| 670 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 670 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
| 671 | 671 | ||
| 672 | head = this_cpu_ptr(call->perf_events); | 672 | head = this_cpu_ptr(call->perf_events); |
| 673 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | 673 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); |
| 674 | 674 | ||
| 675 | out: | 675 | out: |
| 676 | preempt_enable(); | 676 | preempt_enable(); |
