diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-08-06 19:25:54 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-09 06:53:48 -0400 |
commit | f413cdb80ce00ec1a4d0ab949b5d96c81cae7f75 (patch) | |
tree | 08a9621cb1318f73a37faeed14c4e728408551ad /kernel | |
parent | 3a6593050fbd8bbcaed3a44d01c31d907315c86c (diff) |
perf_counter: Fix/complete ftrace event records sampling
This patch implements the kernel side support for ftrace event
record sampling.
A new counter sampling attribute is added:
PERF_SAMPLE_TP_RECORD
which requests ftrace events record sampling. In this case
if a PERF_TYPE_TRACEPOINT counter is active and a tracepoint
fires, we emit the tracepoint binary record to the
perfcounter event buffer, as a sample.
Result, after setting PERF_SAMPLE_TP_RECORD attribute from perf
record:
perf record -f -F 1 -a -e workqueue:workqueue_execution
perf report -D
0x21e18 [0x48]: event: 9
.
. ... raw event: size 72 bytes
. 0000: 09 00 00 00 01 00 48 00 d0 c7 00 81 ff ff ff ff ......H........
. 0010: 0a 00 00 00 0a 00 00 00 21 00 00 00 00 00 00 00 ........!......
. 0020: 2b 00 01 02 0a 00 00 00 0a 00 00 00 65 76 65 6e +...........eve
. 0030: 74 73 2f 31 00 00 00 00 00 00 00 00 0a 00 00 00 ts/1...........
. 0040: e0 b1 31 81 ff ff ff ff .......
.
0x21e18 [0x48]: PERF_EVENT_SAMPLE (IP, 1): 10: 0xffffffff8100c7d0 period: 33
The raw ftrace binary record starts at offset 0020.
Translation:
struct trace_entry {
type = 0x2b = 43;
flags = 1;
preempt_count = 2;
pid = 0xa = 10;
tgid = 0xa = 10;
}
thread_comm = "events/1"
thread_pid = 0xa = 10;
func = 0xffffffff8131b1e0 = flush_to_ldisc()
What will come next?
- Userspace support ('perf trace'), 'flight data recorder' mode
for perf trace, etc.
- The unconditional copy from the profiling callback brings
some costs however if someone wants no such sampling to
occur, and needs to be fixed in the future. For that we need
to have an instant access to the perf counter attribute.
This is a matter of a flag to add in the struct ftrace_event.
- Take care of the events recursivity! Don't ever try to record
a lock event for example, it seems some locking is used in
the profiling fast path and lead to a tracing recursivity.
That will be fixed using raw spinlock or recursivity
protection.
- [...]
- Profit! :-)
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_counter.c | 18 | ||||
-rw-r--r-- | kernel/trace/trace.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace.h | 4 |
3 files changed, 18 insertions, 5 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52eb4b68d34f..868102172aa4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -2646,6 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2646 | u64 counter; | 2646 | u64 counter; |
2647 | } group_entry; | 2647 | } group_entry; |
2648 | struct perf_callchain_entry *callchain = NULL; | 2648 | struct perf_callchain_entry *callchain = NULL; |
2649 | struct perf_tracepoint_record *tp; | ||
2649 | int callchain_size = 0; | 2650 | int callchain_size = 0; |
2650 | u64 time; | 2651 | u64 time; |
2651 | struct { | 2652 | struct { |
@@ -2714,6 +2715,11 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2714 | header.size += sizeof(u64); | 2715 | header.size += sizeof(u64); |
2715 | } | 2716 | } |
2716 | 2717 | ||
2718 | if (sample_type & PERF_SAMPLE_TP_RECORD) { | ||
2719 | tp = data->private; | ||
2720 | header.size += tp->size; | ||
2721 | } | ||
2722 | |||
2717 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); | 2723 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); |
2718 | if (ret) | 2724 | if (ret) |
2719 | return; | 2725 | return; |
@@ -2777,6 +2783,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2777 | } | 2783 | } |
2778 | } | 2784 | } |
2779 | 2785 | ||
2786 | if (sample_type & PERF_SAMPLE_TP_RECORD) | ||
2787 | perf_output_copy(&handle, tp->record, tp->size); | ||
2788 | |||
2780 | perf_output_end(&handle); | 2789 | perf_output_end(&handle); |
2781 | } | 2790 | } |
2782 | 2791 | ||
@@ -3703,11 +3712,18 @@ static const struct pmu perf_ops_task_clock = { | |||
3703 | }; | 3712 | }; |
3704 | 3713 | ||
3705 | #ifdef CONFIG_EVENT_PROFILE | 3714 | #ifdef CONFIG_EVENT_PROFILE |
3706 | void perf_tpcounter_event(int event_id, u64 addr, u64 count) | 3715 | void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, |
3716 | int entry_size) | ||
3707 | { | 3717 | { |
3718 | struct perf_tracepoint_record tp = { | ||
3719 | .size = entry_size, | ||
3720 | .record = record, | ||
3721 | }; | ||
3722 | |||
3708 | struct perf_sample_data data = { | 3723 | struct perf_sample_data data = { |
3709 | .regs = get_irq_regs(), | 3724 | .regs = get_irq_regs(), |
3710 | .addr = addr, | 3725 | .addr = addr, |
3726 | .private = &tp, | ||
3711 | }; | 3727 | }; |
3712 | 3728 | ||
3713 | if (!data.regs) | 3729 | if (!data.regs) |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8930e39b9d8c..c22b40f8f576 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |||
848 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | 848 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | |
849 | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); | 849 | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); |
850 | } | 850 | } |
851 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | ||
851 | 852 | ||
852 | struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, | 853 | struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, |
853 | int type, | 854 | int type, |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3548ae5cc780..8b9f4f6e9559 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, | |||
438 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, | 438 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, |
439 | int *ent_cpu, u64 *ent_ts); | 439 | int *ent_cpu, u64 *ent_ts); |
440 | 440 | ||
441 | void tracing_generic_entry_update(struct trace_entry *entry, | ||
442 | unsigned long flags, | ||
443 | int pc); | ||
444 | |||
445 | void default_wait_pipe(struct trace_iterator *iter); | 441 | void default_wait_pipe(struct trace_iterator *iter); |
446 | void poll_wait_pipe(struct trace_iterator *iter); | 442 | void poll_wait_pipe(struct trace_iterator *iter); |
447 | 443 | ||