aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2009-02-27 19:12:30 -0500
committerSteven Rostedt <srostedt@redhat.com>2009-02-28 03:09:32 -0500
commitc32e827b25054cb17b79cf97fb5e63ae4ce2223c (patch)
tree9a79a8662d88557303a671d147f80038b5fd87dc
parentef5580d0fffce6e0a01043bac0625128b5d409a7 (diff)
tracing: add raw trace point recording infrastructure
Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
-rw-r--r--kernel/trace/events.c6
-rw-r--r--kernel/trace/trace.h19
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events.h57
-rw-r--r--kernel/trace/trace_events_stage_1.h34
-rw-r--r--kernel/trace/trace_events_stage_2.h72
-rw-r--r--kernel/trace/trace_events_stage_3.h219
7 files changed, 350 insertions, 59 deletions
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
index 4e4e45860c58..f2509cbaacea 100644
--- a/kernel/trace/events.c
+++ b/kernel/trace/events.c
@@ -8,6 +8,10 @@
8 8
9#include <trace/trace_events.h> 9#include <trace/trace_events.h>
10 10
11#include "trace_events.h" 11#include "trace_output.h"
12
13#include "trace_events_stage_1.h"
14#include "trace_events_stage_2.h"
15#include "trace_events_stage_3.h"
12 16
13#include <trace/trace_event_types.h> 17#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index adf161f6dd11..aa1ab0cb80ab 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -726,4 +726,23 @@ static inline void trace_branch_disable(void)
726} 726}
727#endif /* CONFIG_BRANCH_TRACER */ 727#endif /* CONFIG_BRANCH_TRACER */
728 728
729struct ftrace_event_call {
730 char *name;
731 char *system;
732 struct dentry *dir;
733 int enabled;
734 int (*regfunc)(void);
735 void (*unregfunc)(void);
736 int id;
737 struct dentry *raw_dir;
738 int raw_enabled;
739 int (*raw_init)(void);
740 int (*raw_reg)(void);
741 void (*raw_unreg)(void);
742};
743
744void event_trace_printk(unsigned long ip, const char *fmt, ...);
745extern struct ftrace_event_call __start_ftrace_events[];
746extern struct ftrace_event_call __stop_ftrace_events[];
747
729#endif /* _LINUX_KERNEL_TRACE_H */ 748#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b811eb343522..77a5c02bd634 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -10,7 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/ctype.h> 11#include <linux/ctype.h>
12 12
13#include "trace_events.h" 13#include "trace.h"
14 14
15#define TRACE_SYSTEM "TRACE_SYSTEM" 15#define TRACE_SYSTEM "TRACE_SYSTEM"
16 16
diff --git a/kernel/trace/trace_events.h b/kernel/trace/trace_events.h
deleted file mode 100644
index b015d7b19878..000000000000
--- a/kernel/trace/trace_events.h
+++ /dev/null
@@ -1,57 +0,0 @@
1#ifndef _LINUX_KERNEL_TRACE_EVENTS_H
2#define _LINUX_KERNEL_TRACE_EVENTS_H
3
4#include <linux/debugfs.h>
5#include <linux/ftrace.h>
6#include "trace.h"
7
8struct ftrace_event_call {
9 char *name;
10 char *system;
11 struct dentry *dir;
12 int enabled;
13 int (*regfunc)(void);
14 void (*unregfunc)(void);
15};
16
17
18#undef TPFMT
19#define TPFMT(fmt, args...) fmt "\n", ##args
20
21#undef TRACE_FORMAT
22#define TRACE_FORMAT(call, proto, args, fmt) \
23static void ftrace_event_##call(proto) \
24{ \
25 event_trace_printk(_RET_IP_, "(" #call ") " fmt); \
26} \
27 \
28static int ftrace_reg_event_##call(void) \
29{ \
30 int ret; \
31 \
32 ret = register_trace_##call(ftrace_event_##call); \
33 if (!ret) \
34 pr_info("event trace: Could not activate trace point " \
35 "probe to " #call); \
36 return ret; \
37} \
38 \
39static void ftrace_unreg_event_##call(void) \
40{ \
41 unregister_trace_##call(ftrace_event_##call); \
42} \
43 \
44static struct ftrace_event_call __used \
45__attribute__((__aligned__(4))) \
46__attribute__((section("_ftrace_events"))) event_##call = { \
47 .name = #call, \
48 .system = STR(TRACE_SYSTEM), \
49 .regfunc = ftrace_reg_event_##call, \
50 .unregfunc = ftrace_unreg_event_##call, \
51}
52
53void event_trace_printk(unsigned long ip, const char *fmt, ...);
54extern struct ftrace_event_call __start_ftrace_events[];
55extern struct ftrace_event_call __stop_ftrace_events[];
56
57#endif /* _LINUX_KERNEL_TRACE_EVENTS_H */
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
new file mode 100644
index 000000000000..fd3bf9382d37
--- /dev/null
+++ b/kernel/trace/trace_events_stage_1.h
@@ -0,0 +1,34 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * [...]
10 * };
11 *
12 * The <type> <item> is created by the TRACE_FIELD(type, item, assign)
13 * macro. We simply do "type item;", and that will create the fields
14 * in the structure.
15 */
16
17#undef TRACE_FORMAT
18#define TRACE_FORMAT(call, proto, args, fmt)
19
20#undef TRACE_EVENT_FORMAT
21#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \
22 struct ftrace_raw_##name { \
23 struct trace_entry ent; \
24 tstruct \
25 }; \
26 static struct ftrace_event_call event_##name
27
28#undef TRACE_STRUCT
29#define TRACE_STRUCT(args...) args
30
31#define TRACE_FIELD(type, item, assign) \
32 type item;
33
34#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
new file mode 100644
index 000000000000..3eaaef5f19e1
--- /dev/null
+++ b/kernel/trace/trace_events_stage_2.h
@@ -0,0 +1,72 @@
1/*
2 * Stage 2 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * enum print_line_t
7 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
8 * {
9 * struct trace_seq *s = &iter->seq;
10 * struct ftrace_raw_<call> *field; <-- defined in stage 1
11 * struct trace_entry *entry;
12 * int ret;
13 *
14 * entry = iter->ent;
15 *
16 * if (entry->type != event_<call>.id) {
17 * WARN_ON_ONCE(1);
18 * return TRACE_TYPE_UNHANDLED;
19 * }
20 *
21 * field = (typeof(field))entry;
22 *
23 * ret = trace_seq_printf(s, <TPRAWFMT> "%s", <ARGS> "\n");
24 * if (!ret)
25 * return TRACE_TYPE_PARTIAL_LINE;
26 *
27 * return TRACE_TYPE_HANDLED;
28 * }
29 *
30 * This is the method used to print the raw event to the trace
31 * output format. Note, this is not needed if the data is read
32 * in binary.
33 */
34
35#undef TRACE_STRUCT
36#define TRACE_STRUCT(args...) args
37
38#undef TRACE_FIELD
39#define TRACE_FIELD(type, item, assign) \
40 field->item,
41
42
43#undef TPRAWFMT
44#define TPRAWFMT(args...) args
45
46#undef TRACE_EVENT_FORMAT
47#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
48enum print_line_t \
49ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
50{ \
51 struct trace_seq *s = &iter->seq; \
52 struct ftrace_raw_##call *field; \
53 struct trace_entry *entry; \
54 int ret; \
55 \
56 entry = iter->ent; \
57 \
58 if (entry->type != event_##call.id) { \
59 WARN_ON_ONCE(1); \
60 return TRACE_TYPE_UNHANDLED; \
61 } \
62 \
63 field = (typeof(field))entry; \
64 \
65 ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \
66 if (!ret) \
67 return TRACE_TYPE_PARTIAL_LINE; \
68 \
69 return TRACE_TYPE_HANDLED; \
70}
71
72#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
new file mode 100644
index 000000000000..7a161c49deb4
--- /dev/null
+++ b/kernel/trace/trace_events_stage_3.h
@@ -0,0 +1,219 @@
1/*
2 * Stage 3 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * static void ftrace_event_<call>(proto)
7 * {
8 * event_trace_printk(_RET_IP_, "(<call>) " <fmt>);
9 * }
10 *
11 * static int ftrace_reg_event_<call>(void)
12 * {
13 * int ret;
14 *
15 * ret = register_trace_<call>(ftrace_event_<call>);
16 * if (!ret)
17 * pr_info("event trace: Could not activate trace point "
18 * "probe to <call>");
19 * return ret;
20 * }
21 *
22 * static void ftrace_unreg_event_<call>(void)
23 * {
24 * unregister_trace_<call>(ftrace_event_<call>);
25 * }
26 *
27 * For those macros defined with TRACE_FORMAT:
28 *
29 * static struct ftrace_event_call __used
30 * __attribute__((__aligned__(4)))
31 * __attribute__((section("_ftrace_events"))) event_<call> = {
32 * .name = "<call>",
33 * .regfunc = ftrace_reg_event_<call>,
34 * .unregfunc = ftrace_unreg_event_<call>,
35 * }
36 *
37 *
38 * For those macros defined with TRACE_EVENT_FORMAT:
39 *
40 * static struct ftrace_event_call event_<call>;
41 *
42 * static void ftrace_raw_event_<call>(proto)
43 * {
44 * struct ring_buffer_event *event;
45 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
46 * unsigned long irq_flags;
47 * int pc;
48 *
49 * local_save_flags(irq_flags);
50 * pc = preempt_count();
51 *
52 * event = trace_current_buffer_lock_reserve(event_<call>.id,
53 * sizeof(struct ftrace_raw_<call>),
54 * irq_flags, pc);
55 * if (!event)
56 * return;
57 * entry = ring_buffer_event_data(event);
58 *
59 * <tstruct>; <-- Here we assign the entries by the TRACE_FIELD.
60 *
61 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
62 * }
63 *
64 * static int ftrace_raw_reg_event_<call>(void)
65 * {
66 * int ret;
67 *
68 * ret = register_trace_<call>(ftrace_raw_event_<call>);
69 * if (!ret)
70 * pr_info("event trace: Could not activate trace point "
71 * "probe to <call>");
72 * return ret;
73 * }
74 *
75 * static void ftrace_unreg_event_<call>(void)
76 * {
77 * unregister_trace_<call>(ftrace_raw_event_<call>);
78 * }
79 *
80 * static struct trace_event ftrace_event_type_<call> = {
81 * .trace = ftrace_raw_output_<call>, <-- stage 2
82 * };
83 *
84 * static int ftrace_raw_init_event_<call>(void)
85 * {
86 * int id;
87 *
88 * id = register_ftrace_event(&ftrace_event_type_<call>);
89 * if (!id)
90 * return -ENODEV;
91 * event_<call>.id = id;
92 * return 0;
93 * }
94 *
95 * static struct ftrace_event_call __used
96 * __attribute__((__aligned__(4)))
97 * __attribute__((section("_ftrace_events"))) event_<call> = {
98 * .name = "<call>",
99 * .regfunc = ftrace_reg_event_<call>,
100 * .unregfunc = ftrace_unreg_event_<call>,
101 * .raw_init = ftrace_raw_init_event_<call>,
102 * .raw_reg = ftrace_raw_reg_event_<call>,
103 * .raw_unreg = ftrace_raw_unreg_event_<call>,
104 * }
105 *
106 */
107
108#undef TPFMT
109#define TPFMT(fmt, args...) fmt "\n", ##args
110
111#define _TRACE_FORMAT(call, proto, args, fmt) \
112static void ftrace_event_##call(proto) \
113{ \
114 event_trace_printk(_RET_IP_, "(" #call ") " fmt); \
115} \
116 \
117static int ftrace_reg_event_##call(void) \
118{ \
119 int ret; \
120 \
121 ret = register_trace_##call(ftrace_event_##call); \
122 if (!ret) \
123 pr_info("event trace: Could not activate trace point " \
124 "probe to " #call); \
125 return ret; \
126} \
127 \
128static void ftrace_unreg_event_##call(void) \
129{ \
130 unregister_trace_##call(ftrace_event_##call); \
131} \
132
133
134#undef TRACE_FORMAT
135#define TRACE_FORMAT(call, proto, args, fmt) \
136_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
137static struct ftrace_event_call __used \
138__attribute__((__aligned__(4))) \
139__attribute__((section("_ftrace_events"))) event_##call = { \
140 .name = #call, \
141 .system = STR(TRACE_SYSTEM), \
142 .regfunc = ftrace_reg_event_##call, \
143 .unregfunc = ftrace_unreg_event_##call, \
144}
145
146#undef TRACE_FIELD
147#define TRACE_FIELD(type, item, assign)\
148 entry->item = assign;
149
150#undef TRACE_EVENT_FORMAT
151#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
152_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
153 \
154static struct ftrace_event_call event_##call; \
155 \
156static void ftrace_raw_event_##call(proto) \
157{ \
158 struct ring_buffer_event *event; \
159 struct ftrace_raw_##call *entry; \
160 unsigned long irq_flags; \
161 int pc; \
162 \
163 local_save_flags(irq_flags); \
164 pc = preempt_count(); \
165 \
166 event = trace_current_buffer_lock_reserve(event_##call.id, \
167 sizeof(struct ftrace_raw_##call), \
168 irq_flags, pc); \
169 if (!event) \
170 return; \
171 entry = ring_buffer_event_data(event); \
172 \
173 tstruct; \
174 \
175 trace_current_buffer_unlock_commit(event, irq_flags, pc); \
176} \
177 \
178static int ftrace_raw_reg_event_##call(void) \
179{ \
180 int ret; \
181 \
182 ret = register_trace_##call(ftrace_raw_event_##call); \
183 if (!ret) \
184 pr_info("event trace: Could not activate trace point " \
185 "probe to " #call); \
186 return ret; \
187} \
188 \
189static void ftrace_raw_unreg_event_##call(void) \
190{ \
191 unregister_trace_##call(ftrace_raw_event_##call); \
192} \
193 \
194static struct trace_event ftrace_event_type_##call = { \
195 .trace = ftrace_raw_output_##call, \
196}; \
197 \
198static int ftrace_raw_init_event_##call(void) \
199{ \
200 int id; \
201 \
202 id = register_ftrace_event(&ftrace_event_type_##call); \
203 if (!id) \
204 return -ENODEV; \
205 event_##call.id = id; \
206 return 0; \
207} \
208 \
209static struct ftrace_event_call __used \
210__attribute__((__aligned__(4))) \
211__attribute__((section("_ftrace_events"))) event_##call = { \
212 .name = #call, \
213 .system = STR(TRACE_SYSTEM), \
214 .regfunc = ftrace_reg_event_##call, \
215 .unregfunc = ftrace_unreg_event_##call, \
216 .raw_init = ftrace_raw_init_event_##call, \
217 .raw_reg = ftrace_raw_reg_event_##call, \
218 .raw_unreg = ftrace_raw_unreg_event_##call, \
219}