aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace_functions_graph.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-19 20:49:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-19 20:49:41 -0500
commit8f55cea410dbc56114bb71a3742032070c8108d0 (patch)
tree59605f0ee961274b22f91add33f5c32459471a83 /kernel/trace/trace_functions_graph.c
parentb7133a9a103655cda254987a3c0975fd9d8c443f (diff)
parente259514eef764a5286873618e34c560ecb6cff13 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar: "There are lots of improvements, the biggest changes are: Main kernel side changes: - Improve uprobes performance by adding 'pre-filtering' support, by Oleg Nesterov. - Make some POWER7 events available in sysfs, equivalent to what was done on x86, from Sukadev Bhattiprolu. - tracing updates by Steve Rostedt - mostly misc fixes and smaller improvements. - Use perf/event tracing to report PCI Express advanced errors, by Tony Luck. - Enable northbridge performance counters on AMD family 15h, by Jacob Shin. - This tracing commit: tracing: Remove the extra 4 bytes of padding in events changes the ABI. All involved parties (PowerTop in particular) seem to agree that it's safe to do now with the introduction of libtraceevent, but the devil is in the details ... Main tooling side changes: - Add 'event group view', from Namyung Kim: To use it, 'perf record' should group events when recording. And then perf report parses the saved group relation from file header and prints them together if --group option is provided. You can use the 'perf evlist' command to see event group information: $ perf record -e '{ref-cycles,cycles}' noploop 1 [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.385 MB perf.data (~16807 samples) ] $ perf evlist --group {ref-cycles,cycles} With this example, default perf report will show you each event separately. You can use --group option to enable event group view: $ perf report --group ... # group: {ref-cycles,cycles} # ======== # Samples: 7K of event 'anon group { ref-cycles, cycles }' # Event count (approx.): 6876107743 # # Overhead Command Shared Object Symbol # ................ ....... ................. .......................... 99.84% 99.76% noploop noploop [.] main 0.07% 0.00% noploop ld-2.15.so [.] strcmp 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del 0.03% 0.03% noploop [kernel.kallsyms] [k] sched_clock_cpu 0.02% 0.00% noploop [kernel.kallsyms] [k] account_user_time 0.01% 0.00% noploop [kernel.kallsyms] [k] __alloc_pages_nodemask 0.00% 0.00% noploop [kernel.kallsyms] [k] native_write_msr_safe 0.00% 0.11% noploop [kernel.kallsyms] [k] _raw_spin_lock 0.00% 0.06% noploop [kernel.kallsyms] [k] find_get_page 0.00% 0.02% noploop [kernel.kallsyms] [k] rcu_check_callbacks 0.00% 0.02% noploop [kernel.kallsyms] [k] __current_kernel_time As you can see the Overhead column now contains both of ref-cycles and cycles and header line shows group information also - 'anon group { ref-cycles, cycles }'. The output is sorted by period of group leader first. - Initial GTK+ annotate browser, from Namhyung Kim. - Add option for runtime switching perf data file in perf report, just press 's' and a menu with the valid files found in the current directory will be presented, from Feng Tang. - Add support to display whole group data for raw columns, from Jiri Olsa. - Add per processor socket count aggregation in perf stat, from Stephane Eranian. - Add interval printing in 'perf stat', from Stephane Eranian. - 'perf test' improvements - Add support for wildcards in tracepoint system name, from Jiri Olsa. - Add anonymous huge page recognition, from Joshua Zhu. - perf build-id cache now can show DSOs present in a perf.data file that are not in the cache, to integrate with build-id servers being put in place by organizations such as Fedora. - perf top now shares more of the evsel config/creation routines with 'record', paving the way for further integration like 'top' snapshots, etc. - perf top now supports DWARF callchains. - Fix mmap limitations on 32-bit, fix from David Miller. - 'perf bench numa mem' NUMA performance measurement suite - ... and lots of fixes, performance improvements, cleanups and other improvements I failed to list - see the shortlog and git log for details." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (270 commits) perf/x86/amd: Enable northbridge performance counters on AMD family 15h perf/hwbp: Fix cleanup in case of kzalloc failure perf tools: Fix build with bison 2.3 and older. perf tools: Limit unwind support to x86 archs perf annotate: Make it to be able to skip unannotatable symbols perf gtk/annotate: Fail early if it can't annotate perf gtk/annotate: Show source lines with gray color perf gtk/annotate: Support multiple event annotation perf ui/gtk: Implement basic GTK2 annotation browser perf annotate: Fix warning message on a missing vmlinux perf buildid-cache: Add --update option uprobes/perf: Avoid uprobe_apply() whenever possible uprobes/perf: Teach trace_uprobe/perf code to use UPROBE_HANDLER_REMOVE uprobes/perf: Teach trace_uprobe/perf code to pre-filter uprobes/perf: Teach trace_uprobe/perf code to track the active perf_event's uprobes: Introduce uprobe_apply() perf: Introduce hw_perf_event->tp_target and ->tp_list uprobes/perf: Always increment trace_uprobe->nhit uprobes/tracing: Kill uprobe_trace_consumer, embed uprobe_consumer into trace_uprobe uprobes/tracing: Introduce is_trace_uprobe_enabled() ...
Diffstat (limited to 'kernel/trace/trace_functions_graph.c')
-rw-r--r--kernel/trace/trace_functions_graph.c68
1 files changed, 65 insertions, 3 deletions
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4edb4b74eb7e..39ada66389cc 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40 48#define TRACE_GRAPH_PRINT_IRQS 0x40
49 49
50static unsigned int max_depth;
51
50static struct tracer_opt trace_opts[] = { 52static struct tracer_opt trace_opts[] = {
51 /* Display overruns? (for self-debug purpose) */ 53 /* Display overruns? (for self-debug purpose) */
52 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 54 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
189 191
190 ftrace_pop_return_trace(&trace, &ret, frame_pointer); 192 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
191 trace.rettime = trace_clock_local(); 193 trace.rettime = trace_clock_local();
192 ftrace_graph_return(&trace);
193 barrier(); 194 barrier();
194 current->curr_ret_stack--; 195 current->curr_ret_stack--;
195 196
197 /*
198 * The trace should run after decrementing the ret counter
199 * in case an interrupt were to come in. We don't want to
200 * lose the interrupt if max_depth is set.
201 */
202 ftrace_graph_return(&trace);
203
196 if (unlikely(!ret)) { 204 if (unlikely(!ret)) {
197 ftrace_graph_stop(); 205 ftrace_graph_stop();
198 WARN_ON(1); 206 WARN_ON(1);
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
250 return 0; 258 return 0;
251 259
252 /* trace it when it is-nested-in or is a function enabled. */ 260 /* trace it when it is-nested-in or is a function enabled. */
253 if (!(trace->depth || ftrace_graph_addr(trace->func)) || 261 if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
254 ftrace_graph_ignore_irqs()) 262 ftrace_graph_ignore_irqs()) ||
263 (max_depth && trace->depth >= max_depth))
255 return 0; 264 return 0;
256 265
257 local_irq_save(flags); 266 local_irq_save(flags);
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = {
1457#endif 1466#endif
1458}; 1467};
1459 1468
1469
1470static ssize_t
1471graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
1472 loff_t *ppos)
1473{
1474 unsigned long val;
1475 int ret;
1476
1477 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1478 if (ret)
1479 return ret;
1480
1481 max_depth = val;
1482
1483 *ppos += cnt;
1484
1485 return cnt;
1486}
1487
1488static ssize_t
1489graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
1490 loff_t *ppos)
1491{
1492 char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
1493 int n;
1494
1495 n = sprintf(buf, "%d\n", max_depth);
1496
1497 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
1498}
1499
1500static const struct file_operations graph_depth_fops = {
1501 .open = tracing_open_generic,
1502 .write = graph_depth_write,
1503 .read = graph_depth_read,
1504 .llseek = generic_file_llseek,
1505};
1506
1507static __init int init_graph_debugfs(void)
1508{
1509 struct dentry *d_tracer;
1510
1511 d_tracer = tracing_init_dentry();
1512 if (!d_tracer)
1513 return 0;
1514
1515 trace_create_file("max_graph_depth", 0644, d_tracer,
1516 NULL, &graph_depth_fops);
1517
1518 return 0;
1519}
1520fs_initcall(init_graph_debugfs);
1521
1460static __init int init_graph_trace(void) 1522static __init int init_graph_trace(void)
1461{ 1523{
1462 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); 1524 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);