aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-01-24 10:10:36 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-04-01 11:21:28 -0400
commitf4f7e28d0e813ddb997f49ae718ddf98db972292 (patch)
tree63a5341eff8d5147909ad7bcbf80a07a0a5fe098
parentccf49bfc6bb1025788637417780e9f1eeae9fc37 (diff)
perf report: Add support for mem access profiling
This patch adds the --mem-mode option to perf report. This mode requires a perf.data file created with memory access samples. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1359040242-8269-13-git-send-email-eranian@google.com [ Removed duplicates in the --sort help, man page needs updating, Fixed minor conflict with 328ccda "perf report: Add --no-demangle option" ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/builtin-report.c135
1 files changed, 131 insertions, 4 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index e31f070abe2f..a20550c9cd68 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -46,6 +46,7 @@ struct perf_report {
46 bool show_full_info; 46 bool show_full_info;
47 bool show_threads; 47 bool show_threads;
48 bool inverted_callchain; 48 bool inverted_callchain;
49 bool mem_mode;
49 struct perf_read_values show_threads_values; 50 struct perf_read_values show_threads_values;
50 const char *pretty_printing_style; 51 const char *pretty_printing_style;
51 symbol_filter_t annotate_init; 52 symbol_filter_t annotate_init;
@@ -64,6 +65,99 @@ static int perf_report_config(const char *var, const char *value, void *cb)
64 return perf_default_config(var, value, cb); 65 return perf_default_config(var, value, cb);
65} 66}
66 67
68static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
69 struct addr_location *al,
70 struct perf_sample *sample,
71 struct perf_evsel *evsel,
72 struct machine *machine,
73 union perf_event *event)
74{
75 struct perf_report *rep = container_of(tool, struct perf_report, tool);
76 struct symbol *parent = NULL;
77 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
78 int err = 0;
79 struct hist_entry *he;
80 struct mem_info *mi, *mx;
81 uint64_t cost;
82
83 if ((sort__has_parent || symbol_conf.use_callchain) &&
84 sample->callchain) {
85 err = machine__resolve_callchain(machine, evsel, al->thread,
86 sample, &parent);
87 if (err)
88 return err;
89 }
90
91 mi = machine__resolve_mem(machine, al->thread, sample, cpumode);
92 if (!mi)
93 return -ENOMEM;
94
95 if (rep->hide_unresolved && !al->sym)
96 return 0;
97
98 cost = sample->weight;
99 if (!cost)
100 cost = 1;
101
102 /*
103 * must pass period=weight in order to get the correct
104 * sorting from hists__collapse_resort() which is solely
105 * based on periods. We want sorting be done on nr_events * weight
106 * and this is indirectly achieved by passing period=weight here
107 * and the he_stat__add_period() function.
108 */
109 he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost);
110 if (!he)
111 return -ENOMEM;
112
113 /*
114 * In the newt browser, we are doing integrated annotation,
115 * so we don't allocate the extra space needed because the stdio
116 * code will not use it.
117 */
118 if (sort__has_sym && he->ms.sym && use_browser > 0) {
119 struct annotation *notes = symbol__annotation(he->ms.sym);
120
121 assert(evsel != NULL);
122
123 if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
124 goto out;
125
126 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
127 if (err)
128 goto out;
129 }
130
131 if (sort__has_sym && he->mem_info->daddr.sym && use_browser > 0) {
132 struct annotation *notes;
133
134 mx = he->mem_info;
135
136 notes = symbol__annotation(mx->daddr.sym);
137 if (notes->src == NULL && symbol__alloc_hist(mx->daddr.sym) < 0)
138 goto out;
139
140 err = symbol__inc_addr_samples(mx->daddr.sym,
141 mx->daddr.map,
142 evsel->idx,
143 mx->daddr.al_addr);
144 if (err)
145 goto out;
146 }
147
148 evsel->hists.stats.total_period += cost;
149 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
150 err = 0;
151
152 if (symbol_conf.use_callchain) {
153 err = callchain_append(he->callchain,
154 &callchain_cursor,
155 sample->period);
156 }
157out:
158 return err;
159}
160
67static int perf_report__add_branch_hist_entry(struct perf_tool *tool, 161static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
68 struct addr_location *al, 162 struct addr_location *al,
69 struct perf_sample *sample, 163 struct perf_sample *sample,
@@ -220,6 +314,12 @@ static int process_sample_event(struct perf_tool *tool,
220 pr_debug("problem adding lbr entry, skipping event\n"); 314 pr_debug("problem adding lbr entry, skipping event\n");
221 return -1; 315 return -1;
222 } 316 }
317 } else if (rep->mem_mode == 1) {
318 if (perf_report__add_mem_hist_entry(tool, &al, sample,
319 evsel, machine, event)) {
320 pr_debug("problem adding mem entry, skipping event\n");
321 return -1;
322 }
223 } else { 323 } else {
224 if (al.map != NULL) 324 if (al.map != NULL)
225 al.map->dso->hit = 1; 325 al.map->dso->hit = 1;
@@ -303,7 +403,8 @@ static void sig_handler(int sig __maybe_unused)
303 session_done = 1; 403 session_done = 1;
304} 404}
305 405
306static size_t hists__fprintf_nr_sample_events(struct hists *self, 406static size_t hists__fprintf_nr_sample_events(struct perf_report *rep,
407 struct hists *self,
307 const char *evname, FILE *fp) 408 const char *evname, FILE *fp)
308{ 409{
309 size_t ret; 410 size_t ret;
@@ -331,7 +432,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
331 if (evname != NULL) 432 if (evname != NULL)
332 ret += fprintf(fp, " of event '%s'", evname); 433 ret += fprintf(fp, " of event '%s'", evname);
333 434
334 ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); 435 if (rep->mem_mode) {
436 ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
437 ret += fprintf(fp, "\n# Sort order : %s", sort_order);
438 } else
439 ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
335 return ret + fprintf(fp, "\n#\n"); 440 return ret + fprintf(fp, "\n#\n");
336} 441}
337 442
@@ -349,7 +454,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
349 !perf_evsel__is_group_leader(pos)) 454 !perf_evsel__is_group_leader(pos))
350 continue; 455 continue;
351 456
352 hists__fprintf_nr_sample_events(hists, evname, stdout); 457 hists__fprintf_nr_sample_events(rep, hists, evname, stdout);
353 hists__fprintf(hists, true, 0, 0, stdout); 458 hists__fprintf(hists, true, 0, 0, stdout);
354 fprintf(stdout, "\n\n"); 459 fprintf(stdout, "\n\n");
355 } 460 }
@@ -646,7 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
646 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 751 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
647 "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," 752 "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
648 " dso_to, dso_from, symbol_to, symbol_from, mispredict," 753 " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
649 " weight, local_weight"), 754 " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
755 "snoop, locked"),
650 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 756 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
651 "Show sample percentage for different cpu modes"), 757 "Show sample percentage for different cpu modes"),
652 OPT_STRING('p', "parent", &parent_pattern, "regex", 758 OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -696,6 +802,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
696 "objdump binary to use for disassembly and annotations"), 802 "objdump binary to use for disassembly and annotations"),
697 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, 803 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
698 "Disable symbol demangling"), 804 "Disable symbol demangling"),
805 OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
699 OPT_END() 806 OPT_END()
700 }; 807 };
701 808
@@ -753,6 +860,18 @@ repeat:
753 "dso_to,symbol_to"; 860 "dso_to,symbol_to";
754 861
755 } 862 }
863 if (report.mem_mode) {
864 if (sort__branch_mode == 1) {
865 fprintf(stderr, "branch and mem mode incompatible\n");
866 goto error;
867 }
868 /*
869 * if no sort_order is provided, then specify
870 * branch-mode specific order
871 */
872 if (sort_order == default_sort_order)
873 sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
874 }
756 875
757 if (setup_sorting() < 0) 876 if (setup_sorting() < 0)
758 usage_with_options(report_usage, options); 877 usage_with_options(report_usage, options);
@@ -818,6 +937,14 @@ repeat:
818 sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); 937 sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
819 sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); 938 sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
820 } else { 939 } else {
940 if (report.mem_mode) {
941 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout);
942 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout);
943 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout);
944 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout);
945 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout);
946 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout);
947 }
821 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); 948 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
822 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); 949 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
823 } 950 }