diff options
author | Stephane Eranian <eranian@google.com> | 2013-01-24 10:10:36 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-04-01 11:21:28 -0400 |
commit | f4f7e28d0e813ddb997f49ae718ddf98db972292 (patch) | |
tree | 63a5341eff8d5147909ad7bcbf80a07a0a5fe098 /tools/perf/builtin-report.c | |
parent | ccf49bfc6bb1025788637417780e9f1eeae9fc37 (diff) |
perf report: Add support for mem access profiling
This patch adds the --mem-mode option to perf report.
This mode requires a perf.data file created with memory access samples.
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1359040242-8269-13-git-send-email-eranian@google.com
[ Removed duplicates in the --sort help, man page needs updating,
Fixed minor conflict with 328ccda "perf report: Add --no-demangle option" ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-report.c')
-rw-r--r-- | tools/perf/builtin-report.c | 135 |
1 files changed, 131 insertions, 4 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e31f070abe2f..a20550c9cd68 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -46,6 +46,7 @@ struct perf_report { | |||
46 | bool show_full_info; | 46 | bool show_full_info; |
47 | bool show_threads; | 47 | bool show_threads; |
48 | bool inverted_callchain; | 48 | bool inverted_callchain; |
49 | bool mem_mode; | ||
49 | struct perf_read_values show_threads_values; | 50 | struct perf_read_values show_threads_values; |
50 | const char *pretty_printing_style; | 51 | const char *pretty_printing_style; |
51 | symbol_filter_t annotate_init; | 52 | symbol_filter_t annotate_init; |
@@ -64,6 +65,99 @@ static int perf_report_config(const char *var, const char *value, void *cb) | |||
64 | return perf_default_config(var, value, cb); | 65 | return perf_default_config(var, value, cb); |
65 | } | 66 | } |
66 | 67 | ||
68 | static int perf_report__add_mem_hist_entry(struct perf_tool *tool, | ||
69 | struct addr_location *al, | ||
70 | struct perf_sample *sample, | ||
71 | struct perf_evsel *evsel, | ||
72 | struct machine *machine, | ||
73 | union perf_event *event) | ||
74 | { | ||
75 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
76 | struct symbol *parent = NULL; | ||
77 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
78 | int err = 0; | ||
79 | struct hist_entry *he; | ||
80 | struct mem_info *mi, *mx; | ||
81 | uint64_t cost; | ||
82 | |||
83 | if ((sort__has_parent || symbol_conf.use_callchain) && | ||
84 | sample->callchain) { | ||
85 | err = machine__resolve_callchain(machine, evsel, al->thread, | ||
86 | sample, &parent); | ||
87 | if (err) | ||
88 | return err; | ||
89 | } | ||
90 | |||
91 | mi = machine__resolve_mem(machine, al->thread, sample, cpumode); | ||
92 | if (!mi) | ||
93 | return -ENOMEM; | ||
94 | |||
95 | if (rep->hide_unresolved && !al->sym) | ||
96 | return 0; | ||
97 | |||
98 | cost = sample->weight; | ||
99 | if (!cost) | ||
100 | cost = 1; | ||
101 | |||
102 | /* | ||
103 | * must pass period=weight in order to get the correct | ||
104 | * sorting from hists__collapse_resort() which is solely | ||
105 | * based on periods. We want sorting be done on nr_events * weight | ||
106 | * and this is indirectly achieved by passing period=weight here | ||
107 | * and the he_stat__add_period() function. | ||
108 | */ | ||
109 | he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost); | ||
110 | if (!he) | ||
111 | return -ENOMEM; | ||
112 | |||
113 | /* | ||
114 | * In the newt browser, we are doing integrated annotation, | ||
115 | * so we don't allocate the extra space needed because the stdio | ||
116 | * code will not use it. | ||
117 | */ | ||
118 | if (sort__has_sym && he->ms.sym && use_browser > 0) { | ||
119 | struct annotation *notes = symbol__annotation(he->ms.sym); | ||
120 | |||
121 | assert(evsel != NULL); | ||
122 | |||
123 | if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0) | ||
124 | goto out; | ||
125 | |||
126 | err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); | ||
127 | if (err) | ||
128 | goto out; | ||
129 | } | ||
130 | |||
131 | if (sort__has_sym && he->mem_info->daddr.sym && use_browser > 0) { | ||
132 | struct annotation *notes; | ||
133 | |||
134 | mx = he->mem_info; | ||
135 | |||
136 | notes = symbol__annotation(mx->daddr.sym); | ||
137 | if (notes->src == NULL && symbol__alloc_hist(mx->daddr.sym) < 0) | ||
138 | goto out; | ||
139 | |||
140 | err = symbol__inc_addr_samples(mx->daddr.sym, | ||
141 | mx->daddr.map, | ||
142 | evsel->idx, | ||
143 | mx->daddr.al_addr); | ||
144 | if (err) | ||
145 | goto out; | ||
146 | } | ||
147 | |||
148 | evsel->hists.stats.total_period += cost; | ||
149 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); | ||
150 | err = 0; | ||
151 | |||
152 | if (symbol_conf.use_callchain) { | ||
153 | err = callchain_append(he->callchain, | ||
154 | &callchain_cursor, | ||
155 | sample->period); | ||
156 | } | ||
157 | out: | ||
158 | return err; | ||
159 | } | ||
160 | |||
67 | static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | 161 | static int perf_report__add_branch_hist_entry(struct perf_tool *tool, |
68 | struct addr_location *al, | 162 | struct addr_location *al, |
69 | struct perf_sample *sample, | 163 | struct perf_sample *sample, |
@@ -220,6 +314,12 @@ static int process_sample_event(struct perf_tool *tool, | |||
220 | pr_debug("problem adding lbr entry, skipping event\n"); | 314 | pr_debug("problem adding lbr entry, skipping event\n"); |
221 | return -1; | 315 | return -1; |
222 | } | 316 | } |
317 | } else if (rep->mem_mode == 1) { | ||
318 | if (perf_report__add_mem_hist_entry(tool, &al, sample, | ||
319 | evsel, machine, event)) { | ||
320 | pr_debug("problem adding mem entry, skipping event\n"); | ||
321 | return -1; | ||
322 | } | ||
223 | } else { | 323 | } else { |
224 | if (al.map != NULL) | 324 | if (al.map != NULL) |
225 | al.map->dso->hit = 1; | 325 | al.map->dso->hit = 1; |
@@ -303,7 +403,8 @@ static void sig_handler(int sig __maybe_unused) | |||
303 | session_done = 1; | 403 | session_done = 1; |
304 | } | 404 | } |
305 | 405 | ||
306 | static size_t hists__fprintf_nr_sample_events(struct hists *self, | 406 | static size_t hists__fprintf_nr_sample_events(struct perf_report *rep, |
407 | struct hists *self, | ||
307 | const char *evname, FILE *fp) | 408 | const char *evname, FILE *fp) |
308 | { | 409 | { |
309 | size_t ret; | 410 | size_t ret; |
@@ -331,7 +432,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self, | |||
331 | if (evname != NULL) | 432 | if (evname != NULL) |
332 | ret += fprintf(fp, " of event '%s'", evname); | 433 | ret += fprintf(fp, " of event '%s'", evname); |
333 | 434 | ||
334 | ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); | 435 | if (rep->mem_mode) { |
436 | ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); | ||
437 | ret += fprintf(fp, "\n# Sort order : %s", sort_order); | ||
438 | } else | ||
439 | ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); | ||
335 | return ret + fprintf(fp, "\n#\n"); | 440 | return ret + fprintf(fp, "\n#\n"); |
336 | } | 441 | } |
337 | 442 | ||
@@ -349,7 +454,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, | |||
349 | !perf_evsel__is_group_leader(pos)) | 454 | !perf_evsel__is_group_leader(pos)) |
350 | continue; | 455 | continue; |
351 | 456 | ||
352 | hists__fprintf_nr_sample_events(hists, evname, stdout); | 457 | hists__fprintf_nr_sample_events(rep, hists, evname, stdout); |
353 | hists__fprintf(hists, true, 0, 0, stdout); | 458 | hists__fprintf(hists, true, 0, 0, stdout); |
354 | fprintf(stdout, "\n\n"); | 459 | fprintf(stdout, "\n\n"); |
355 | } | 460 | } |
@@ -646,7 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
646 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 751 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
647 | "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," | 752 | "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," |
648 | " dso_to, dso_from, symbol_to, symbol_from, mispredict," | 753 | " dso_to, dso_from, symbol_to, symbol_from, mispredict," |
649 | " weight, local_weight"), | 754 | " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, " |
755 | "snoop, locked"), | ||
650 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, | 756 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, |
651 | "Show sample percentage for different cpu modes"), | 757 | "Show sample percentage for different cpu modes"), |
652 | OPT_STRING('p', "parent", &parent_pattern, "regex", | 758 | OPT_STRING('p', "parent", &parent_pattern, "regex", |
@@ -696,6 +802,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
696 | "objdump binary to use for disassembly and annotations"), | 802 | "objdump binary to use for disassembly and annotations"), |
697 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, | 803 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, |
698 | "Disable symbol demangling"), | 804 | "Disable symbol demangling"), |
805 | OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), | ||
699 | OPT_END() | 806 | OPT_END() |
700 | }; | 807 | }; |
701 | 808 | ||
@@ -753,6 +860,18 @@ repeat: | |||
753 | "dso_to,symbol_to"; | 860 | "dso_to,symbol_to"; |
754 | 861 | ||
755 | } | 862 | } |
863 | if (report.mem_mode) { | ||
864 | if (sort__branch_mode == 1) { | ||
865 | fprintf(stderr, "branch and mem mode incompatible\n"); | ||
866 | goto error; | ||
867 | } | ||
868 | /* | ||
869 | * if no sort_order is provided, then specify | ||
870 | * branch-mode specific order | ||
871 | */ | ||
872 | if (sort_order == default_sort_order) | ||
873 | sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; | ||
874 | } | ||
756 | 875 | ||
757 | if (setup_sorting() < 0) | 876 | if (setup_sorting() < 0) |
758 | usage_with_options(report_usage, options); | 877 | usage_with_options(report_usage, options); |
@@ -818,6 +937,14 @@ repeat: | |||
818 | sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); | 937 | sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); |
819 | sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); | 938 | sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); |
820 | } else { | 939 | } else { |
940 | if (report.mem_mode) { | ||
941 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout); | ||
942 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout); | ||
943 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout); | ||
944 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout); | ||
945 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout); | ||
946 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout); | ||
947 | } | ||
821 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | 948 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); |
822 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | 949 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); |
823 | } | 950 | } |