aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-08-06 19:25:54 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-09 06:53:48 -0400
commitf413cdb80ce00ec1a4d0ab949b5d96c81cae7f75 (patch)
tree08a9621cb1318f73a37faeed14c4e728408551ad /include
parent3a6593050fbd8bbcaed3a44d01c31d907315c86c (diff)
perf_counter: Fix/complete ftrace event records sampling
This patch implements the kernel side support for ftrace event record sampling. A new counter sampling attribute is added: PERF_SAMPLE_TP_RECORD which requests ftrace events record sampling. In this case if a PERF_TYPE_TRACEPOINT counter is active and a tracepoint fires, we emit the tracepoint binary record to the perfcounter event buffer, as a sample. Result, after setting PERF_SAMPLE_TP_RECORD attribute from perf record: perf record -f -F 1 -a -e workqueue:workqueue_execution perf report -D 0x21e18 [0x48]: event: 9 . . ... raw event: size 72 bytes . 0000: 09 00 00 00 01 00 48 00 d0 c7 00 81 ff ff ff ff ......H........ . 0010: 0a 00 00 00 0a 00 00 00 21 00 00 00 00 00 00 00 ........!...... . 0020: 2b 00 01 02 0a 00 00 00 0a 00 00 00 65 76 65 6e +...........eve . 0030: 74 73 2f 31 00 00 00 00 00 00 00 00 0a 00 00 00 ts/1........... . 0040: e0 b1 31 81 ff ff ff ff ....... . 0x21e18 [0x48]: PERF_EVENT_SAMPLE (IP, 1): 10: 0xffffffff8100c7d0 period: 33 The raw ftrace binary record starts at offset 0020. Translation: struct trace_entry { type = 0x2b = 43; flags = 1; preempt_count = 2; pid = 0xa = 10; tgid = 0xa = 10; } thread_comm = "events/1" thread_pid = 0xa = 10; func = 0xffffffff8131b1e0 = flush_to_ldisc() What will come next? - Userspace support ('perf trace'), 'flight data recorder' mode for perf trace, etc. - The unconditional copy from the profiling callback brings some costs however if someone wants no such sampling to occur, and needs to be fixed in the future. For that we need to have an instant access to the perf counter attribute. This is a matter of a flag to add in the struct ftrace_event. - Take care of the events recursivity! Don't ever try to record a lock event for example, it seems some locking is used in the profiling fast path and lead to a tracing recursivity. That will be fixed using raw spinlock or recursivity protection. - [...] - Profit! :-) Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Gabriel Munteanu <eduard.munteanu@linux360.ro> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r--include/linux/ftrace_event.h4
-rw-r--r--include/linux/perf_counter.h9
-rw-r--r--include/trace/ftrace.h130
3 files changed, 107 insertions, 36 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d7cd193c2277..a81170de7f6b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -89,7 +89,9 @@ enum print_line_t {
89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
90}; 90};
91 91
92 92void tracing_generic_entry_update(struct trace_entry *entry,
93 unsigned long flags,
94 int pc);
93struct ring_buffer_event * 95struct ring_buffer_event *
94trace_current_buffer_lock_reserve(int type, unsigned long len, 96trace_current_buffer_lock_reserve(int type, unsigned long len,
95 unsigned long flags, int pc); 97 unsigned long flags, int pc);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e604e6ef72dd..a67dd5c5b6d3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -121,8 +121,9 @@ enum perf_counter_sample_format {
121 PERF_SAMPLE_CPU = 1U << 7, 121 PERF_SAMPLE_CPU = 1U << 7,
122 PERF_SAMPLE_PERIOD = 1U << 8, 122 PERF_SAMPLE_PERIOD = 1U << 8,
123 PERF_SAMPLE_STREAM_ID = 1U << 9, 123 PERF_SAMPLE_STREAM_ID = 1U << 9,
124 PERF_SAMPLE_TP_RECORD = 1U << 10,
124 125
125 PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ 126 PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
126}; 127};
127 128
128/* 129/*
@@ -413,6 +414,11 @@ struct perf_callchain_entry {
413 __u64 ip[PERF_MAX_STACK_DEPTH]; 414 __u64 ip[PERF_MAX_STACK_DEPTH];
414}; 415};
415 416
417struct perf_tracepoint_record {
418 int size;
419 char *record;
420};
421
416struct task_struct; 422struct task_struct;
417 423
418/** 424/**
@@ -681,6 +687,7 @@ struct perf_sample_data {
681 struct pt_regs *regs; 687 struct pt_regs *regs;
682 u64 addr; 688 u64 addr;
683 u64 period; 689 u64 period;
690 void *private;
684}; 691};
685 692
686extern int perf_counter_overflow(struct perf_counter *counter, int nmi, 693extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index fec71f8dbc48..7fb16d90e7b1 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -353,15 +353,7 @@ static inline int ftrace_get_offsets_##call( \
353/* 353/*
354 * Generate the functions needed for tracepoint perf_counter support. 354 * Generate the functions needed for tracepoint perf_counter support.
355 * 355 *
356 * static void ftrace_profile_<call>(proto) 356 * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
357 * {
358 * extern void perf_tpcounter_event(int, u64, u64);
359 * u64 __addr = 0, __count = 1;
360 *
361 * <assign> <-- here we expand the TP_perf_assign() macro
362 *
363 * perf_tpcounter_event(event_<call>.id, __addr, __count);
364 * }
365 * 357 *
366 * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call) 358 * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
367 * { 359 * {
@@ -381,28 +373,10 @@ static inline int ftrace_get_offsets_##call( \
381 * 373 *
382 */ 374 */
383 375
384#undef TP_fast_assign
385#define TP_fast_assign(args...)
386
387#undef TP_perf_assign
388#define TP_perf_assign(args...) args
389
390#undef __perf_addr
391#define __perf_addr(a) __addr = (a)
392
393#undef __perf_count
394#define __perf_count(c) __count = (c)
395
396#undef TRACE_EVENT 376#undef TRACE_EVENT
397#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 377#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
398 \ 378 \
399static void ftrace_profile_##call(proto) \ 379static void ftrace_profile_##call(proto); \
400{ \
401 extern void perf_tpcounter_event(int, u64, u64); \
402 u64 __addr = 0, __count = 1; \
403 { assign; } \
404 perf_tpcounter_event(event_##call.id, __addr, __count); \
405} \
406 \ 380 \
407static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ 381static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
408{ \ 382{ \
@@ -422,12 +396,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
422 396
423#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 397#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
424 398
425#undef TP_fast_assign
426#define TP_fast_assign(args...) args
427
428#undef TP_perf_assign
429#define TP_perf_assign(args...)
430
431#endif 399#endif
432 400
433/* 401/*
@@ -647,5 +615,99 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
647 615
648#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 616#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
649 617
618/*
619 * Define the insertion callback to profile events
620 *
621 * The job is very similar to ftrace_raw_event_<call> except that we don't
622 * insert in the ring buffer but in a perf counter.
623 *
624 * static void ftrace_profile_<call>(proto)
625 * {
626 * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
627 * struct ftrace_event_call *event_call = &event_<call>;
628 * extern void perf_tpcounter_event(int, u64, u64, void *, int);
629 * struct ftrace_raw_##call *entry;
630 * u64 __addr = 0, __count = 1;
631 * unsigned long irq_flags;
632 * int __entry_size;
633 * int __data_size;
634 * int pc;
635 *
636 * local_save_flags(irq_flags);
637 * pc = preempt_count();
638 *
639 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
640 * __entry_size = __data_size + sizeof(*entry);
641 *
642 * do {
643 * char raw_data[__entry_size]; <- allocate our sample in the stack
644 * struct trace_entry *ent;
645 *
646 * entry = (struct ftrace_raw_<call> *)raw_data;
647 * ent = &entry->ent;
648 * tracing_generic_entry_update(ent, irq_flags, pc);
649 * ent->type = event_call->id;
650 *
651 * <tstruct> <- do some jobs with dynamic arrays
652 *
653 * <assign> <- affect our values
654 *
655 * perf_tpcounter_event(event_call->id, __addr, __count, entry,
656 * __entry_size); <- submit them to perf counter
657 * } while (0);
658 *
659 * }
660 */
661
662#ifdef CONFIG_EVENT_PROFILE
663
664#undef __perf_addr
665#define __perf_addr(a) __addr = (a)
666
667#undef __perf_count
668#define __perf_count(c) __count = (c)
669
670#undef TRACE_EVENT
671#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
672static void ftrace_profile_##call(proto) \
673{ \
674 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
675 struct ftrace_event_call *event_call = &event_##call; \
676 extern void perf_tpcounter_event(int, u64, u64, void *, int); \
677 struct ftrace_raw_##call *entry; \
678 u64 __addr = 0, __count = 1; \
679 unsigned long irq_flags; \
680 int __entry_size; \
681 int __data_size; \
682 int pc; \
683 \
684 local_save_flags(irq_flags); \
685 pc = preempt_count(); \
686 \
687 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
688 __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
689 \
690 do { \
691 char raw_data[__entry_size]; \
692 struct trace_entry *ent; \
693 \
694 entry = (struct ftrace_raw_##call *)raw_data; \
695 ent = &entry->ent; \
696 tracing_generic_entry_update(ent, irq_flags, pc); \
697 ent->type = event_call->id; \
698 \
699 tstruct \
700 \
701 { assign; } \
702 \
703 perf_tpcounter_event(event_call->id, __addr, __count, entry,\
704 __entry_size); \
705 } while (0); \
706 \
707}
708
709#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
710#endif /* CONFIG_EVENT_PROFILE */
711
650#undef _TRACE_PROFILE_INIT 712#undef _TRACE_PROFILE_INIT
651 713