aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2013-07-12 17:07:27 -0400
committerSteven Rostedt <rostedt@goodmis.org>2013-07-26 13:39:44 -0400
commit102c9323c35a83789ad5ebd3c45fa8fb389add88 (patch)
tree1774b9144e5a00a49358475c1aca9ad33713e417
parent3b2f64d00c46e1e4e9bd0bb9bb12619adac27a4b (diff)
tracing: Add __tracepoint_string() to export string pointers
There are several tracepoints (mostly in RCU), that reference a string pointer and uses the print format of "%s" to display the string that exists in the kernel, instead of copying the actual string to the ring buffer (saves time and ring buffer space). But this has an issue with userspace tools that read the binary buffers that has the address of the string but has no access to what the string itself is. The end result is just output that looks like: rcu_dyntick: ffffffff818adeaa 1 0 rcu_dyntick: ffffffff818adeb5 0 140000000000000 rcu_dyntick: ffffffff818adeb5 0 140000000000000 rcu_utilization: ffffffff8184333b rcu_utilization: ffffffff8184333b The above is pretty useless when read by the userspace tools. Ideally we would want something that looks like this: rcu_dyntick: Start 1 0 rcu_dyntick: End 0 140000000000000 rcu_dyntick: Start 140000000000000 0 rcu_callback: rcu_preempt rhp=0xffff880037aff710 func=put_cred_rcu 0/4 rcu_callback: rcu_preempt rhp=0xffff880078961980 func=file_free_rcu 0/5 rcu_dyntick: End 0 1 The trace_printk() which also only stores the address of the string format instead of recording the string into the buffer itself, exports the mapping of kernel addresses to format strings via the printk_format file in the debugfs tracing directory. The tracepoint strings can use this same method and output the format to the same file and the userspace tools will be able to decipher the address without any modification. The tracepoint strings need its own section to save the strings because the trace_printk section will cause the trace_printk() buffers to be allocated if anything exists within the section. trace_printk() is only used for debugging and should never exist in the kernel, we can not use the trace_printk sections. Add a new tracepoint_str section that will also be examined by the output of the printk_format file. Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--include/asm-generic/vmlinux.lds.h7
-rw-r--r--include/linux/ftrace_event.h34
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_printk.c19
4 files changed, 62 insertions, 1 deletions
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 69732d279e8b..83e2c31e8b00 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -122,8 +122,12 @@
122#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .; \ 122#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .; \
123 *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \ 123 *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \
124 VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .; 124 VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
125#define TRACEPOINT_STR() VMLINUX_SYMBOL(__start___tracepoint_str) = .; \
126 *(__tracepoint_str) /* Trace_printk fmt' pointer */ \
127 VMLINUX_SYMBOL(__stop___tracepoint_str) = .;
125#else 128#else
126#define TRACE_PRINTKS() 129#define TRACE_PRINTKS()
130#define TRACEPOINT_STR()
127#endif 131#endif
128 132
129#ifdef CONFIG_FTRACE_SYSCALLS 133#ifdef CONFIG_FTRACE_SYSCALLS
@@ -190,7 +194,8 @@
190 VMLINUX_SYMBOL(__stop___verbose) = .; \ 194 VMLINUX_SYMBOL(__stop___verbose) = .; \
191 LIKELY_PROFILE() \ 195 LIKELY_PROFILE() \
192 BRANCH_PROFILE() \ 196 BRANCH_PROFILE() \
193 TRACE_PRINTKS() 197 TRACE_PRINTKS() \
198 TRACEPOINT_STR()
194 199
195/* 200/*
196 * Data section helpers 201 * Data section helpers
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4372658c73ae..81af18a75f4d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -357,6 +357,40 @@ do { \
357 __trace_printk(ip, fmt, ##args); \ 357 __trace_printk(ip, fmt, ##args); \
358} while (0) 358} while (0)
359 359
360/**
361 * tracepoint_string - register constant persistent string to trace system
362 * @str - a constant persistent string that will be referenced in tracepoints
363 *
364 * If constant strings are being used in tracepoints, it is faster and
365 * more efficient to just save the pointer to the string and reference
366 * that with a printf "%s" instead of saving the string in the ring buffer
367 * and wasting space and time.
368 *
369 * The problem with the above approach is that userspace tools that read
370 * the binary output of the trace buffers do not have access to the string.
371 * Instead they just show the address of the string which is not very
372 * useful to users.
373 *
374 * With tracepoint_string(), the string will be registered to the tracing
375 * system and exported to userspace via the debugfs/tracing/printk_formats
376 * file that maps the string address to the string text. This way userspace
377 * tools that read the binary buffers have a way to map the pointers to
378 * the ASCII strings they represent.
379 *
380 * The @str used must be a constant string and persistent as it would not
381 * make sense to show a string that no longer exists. But it is still fine
382 * to be used with modules, because when modules are unloaded, if they
383 * had tracepoints, the ring buffers are cleared too. As long as the string
384 * does not change during the life of the module, it is fine to use
385 * tracepoint_string() within a module.
386 */
387#define tracepoint_string(str) \
388 ({ \
389 static const char *___tp_str __tracepoint_string = str; \
390 ___tp_str; \
391 })
392#define __tracepoint_string __attribute__((section("__tracepoint_str")))
393
360#ifdef CONFIG_PERF_EVENTS 394#ifdef CONFIG_PERF_EVENTS
361struct perf_event; 395struct perf_event;
362 396
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a4f6e1828b6..ba321f12df8c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1022,6 +1022,9 @@ extern struct list_head ftrace_events;
1022extern const char *__start___trace_bprintk_fmt[]; 1022extern const char *__start___trace_bprintk_fmt[];
1023extern const char *__stop___trace_bprintk_fmt[]; 1023extern const char *__stop___trace_bprintk_fmt[];
1024 1024
1025extern const char *__start___tracepoint_str[];
1026extern const char *__stop___tracepoint_str[];
1027
1025void trace_printk_init_buffers(void); 1028void trace_printk_init_buffers(void);
1026void trace_printk_start_comm(void); 1029void trace_printk_start_comm(void);
1027int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); 1030int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index a9077c1b4ad3..2900817ba65c 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -244,12 +244,31 @@ static const char **find_next(void *v, loff_t *pos)
244{ 244{
245 const char **fmt = v; 245 const char **fmt = v;
246 int start_index; 246 int start_index;
247 int last_index;
247 248
248 start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; 249 start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
249 250
250 if (*pos < start_index) 251 if (*pos < start_index)
251 return __start___trace_bprintk_fmt + *pos; 252 return __start___trace_bprintk_fmt + *pos;
252 253
254 /*
255 * The __tracepoint_str section is treated the same as the
256 * __trace_printk_fmt section. The difference is that the
257 * __trace_printk_fmt section should only be used by trace_printk()
258 * in a debugging environment, as if anything exists in that section
259 * the trace_prink() helper buffers are allocated, which would just
260 * waste space in a production environment.
261 *
262 * The __tracepoint_str sections on the other hand are used by
263 * tracepoints which need to map pointers to their strings to
264 * the ASCII text for userspace.
265 */
266 last_index = start_index;
267 start_index = __stop___tracepoint_str - __start___tracepoint_str;
268
269 if (*pos < last_index + start_index)
270 return __start___tracepoint_str + (*pos - last_index);
271
253 return find_next_mod_format(start_index, v, fmt, pos); 272 return find_next_mod_format(start_index, v, fmt, pos);
254} 273}
255 274