diff options
author | Stephane Eranian <eranian@google.com> | 2013-01-24 10:10:38 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-04-01 11:21:44 -0400 |
commit | 028f12ee6beff0961781c5ed3f740e5f3b56f781 (patch) | |
tree | 2bb9711e212df80f361868b9fc23a12f0b9fbb7d /tools/perf | |
parent | f4f7e28d0e813ddb997f49ae718ddf98db972292 (diff) |
perf tools: Add new mem command for memory access profiling
This new command is a wrapper on top of perf record and perf report to
make it easier to configure for memory access profiling.
To record loads:
$ perf mem -t load rec .....
To record stores:
$ perf mem -t store rec .....
To get the report:
$ perf mem -t load rep
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1359040242-8269-15-git-send-email-eranian@google.com
[ Fixed minor conflict with 66857b5 "Sort command-list.txt alphabetically" ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-mem.txt | 48 | ||||
-rw-r--r-- | tools/perf/Makefile | 1 | ||||
-rw-r--r-- | tools/perf/builtin-mem.c | 242 | ||||
-rw-r--r-- | tools/perf/builtin.h | 1 | ||||
-rw-r--r-- | tools/perf/command-list.txt | 1 | ||||
-rw-r--r-- | tools/perf/perf.c | 1 | ||||
-rw-r--r-- | tools/perf/util/hist.c | 1 |
7 files changed, 295 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt new file mode 100644 index 000000000000..888d51137fbe --- /dev/null +++ b/tools/perf/Documentation/perf-mem.txt | |||
@@ -0,0 +1,48 @@ | |||
1 | perf-mem(1) | ||
2 | =========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-mem - Profile memory accesses | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf mem' [<options>] (record [<command>] | report) | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | "perf mem -t <TYPE> record" runs a command and gathers memory operation data | ||
16 | from it, into perf.data. Perf record options are accepted and are passed through. | ||
17 | |||
18 | "perf mem -t <TYPE> report" displays the result. It invokes perf report with the | ||
19 | right set of options to display a memory access profile. | ||
20 | |||
21 | OPTIONS | ||
22 | ------- | ||
23 | <command>...:: | ||
24 | Any command you can specify in a shell. | ||
25 | |||
26 | -t:: | ||
27 | --type=:: | ||
28 | Select the memory operation type: load or store (default: load) | ||
29 | |||
30 | -D:: | ||
31 | --dump-raw-samples=:: | ||
32 | Dump the raw decoded samples on the screen in a format that is easy to parse with | ||
33 | one sample per line. | ||
34 | |||
35 | -x:: | ||
36 | --field-separator:: | ||
37 | Specify the field separator used when dump raw samples (-D option). By default, | ||
38 | The separator is the space character. | ||
39 | |||
40 | -C:: | ||
41 | --cpu-list:: | ||
42 | Restrict dump of raw samples to those provided via this option. Note that the same | ||
43 | option can be passed in record mode. It will be interpreted the same way as perf | ||
44 | record. | ||
45 | |||
46 | SEE ALSO | ||
47 | -------- | ||
48 | linkperf:perf-record[1], linkperf:perf-report[1] | ||
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0230b75ed7f9..07feae773dc1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -547,6 +547,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o | |||
547 | BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o | 547 | BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o |
548 | BUILTIN_OBJS += $(OUTPUT)builtin-inject.o | 548 | BUILTIN_OBJS += $(OUTPUT)builtin-inject.o |
549 | BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o | 549 | BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o |
550 | BUILTIN_OBJS += $(OUTPUT)builtin-mem.o | ||
550 | 551 | ||
551 | PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) | 552 | PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) |
552 | 553 | ||
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c new file mode 100644 index 000000000000..a8ff6d264e50 --- /dev/null +++ b/tools/perf/builtin-mem.c | |||
@@ -0,0 +1,242 @@ | |||
1 | #include "builtin.h" | ||
2 | #include "perf.h" | ||
3 | |||
4 | #include "util/parse-options.h" | ||
5 | #include "util/trace-event.h" | ||
6 | #include "util/tool.h" | ||
7 | #include "util/session.h" | ||
8 | |||
9 | #define MEM_OPERATION_LOAD "load" | ||
10 | #define MEM_OPERATION_STORE "store" | ||
11 | |||
12 | static const char *mem_operation = MEM_OPERATION_LOAD; | ||
13 | |||
14 | struct perf_mem { | ||
15 | struct perf_tool tool; | ||
16 | char const *input_name; | ||
17 | symbol_filter_t annotate_init; | ||
18 | bool hide_unresolved; | ||
19 | bool dump_raw; | ||
20 | const char *cpu_list; | ||
21 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | ||
22 | }; | ||
23 | |||
24 | static const char * const mem_usage[] = { | ||
25 | "perf mem [<options>] {record <command> |report}", | ||
26 | NULL | ||
27 | }; | ||
28 | |||
29 | static int __cmd_record(int argc, const char **argv) | ||
30 | { | ||
31 | int rec_argc, i = 0, j; | ||
32 | const char **rec_argv; | ||
33 | char event[64]; | ||
34 | int ret; | ||
35 | |||
36 | rec_argc = argc + 4; | ||
37 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | ||
38 | if (!rec_argv) | ||
39 | return -1; | ||
40 | |||
41 | rec_argv[i++] = strdup("record"); | ||
42 | if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) | ||
43 | rec_argv[i++] = strdup("-W"); | ||
44 | rec_argv[i++] = strdup("-d"); | ||
45 | rec_argv[i++] = strdup("-e"); | ||
46 | |||
47 | if (strcmp(mem_operation, MEM_OPERATION_LOAD)) | ||
48 | sprintf(event, "cpu/mem-stores/pp"); | ||
49 | else | ||
50 | sprintf(event, "cpu/mem-loads/pp"); | ||
51 | |||
52 | rec_argv[i++] = strdup(event); | ||
53 | for (j = 1; j < argc; j++, i++) | ||
54 | rec_argv[i] = argv[j]; | ||
55 | |||
56 | ret = cmd_record(i, rec_argv, NULL); | ||
57 | free(rec_argv); | ||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static int | ||
62 | dump_raw_samples(struct perf_tool *tool, | ||
63 | union perf_event *event, | ||
64 | struct perf_sample *sample, | ||
65 | struct perf_evsel *evsel __maybe_unused, | ||
66 | struct machine *machine) | ||
67 | { | ||
68 | struct perf_mem *mem = container_of(tool, struct perf_mem, tool); | ||
69 | struct addr_location al; | ||
70 | const char *fmt; | ||
71 | |||
72 | if (perf_event__preprocess_sample(event, machine, &al, sample, | ||
73 | mem->annotate_init) < 0) { | ||
74 | fprintf(stderr, "problem processing %d event, skipping it.\n", | ||
75 | event->header.type); | ||
76 | return -1; | ||
77 | } | ||
78 | |||
79 | if (al.filtered || (mem->hide_unresolved && al.sym == NULL)) | ||
80 | return 0; | ||
81 | |||
82 | if (al.map != NULL) | ||
83 | al.map->dso->hit = 1; | ||
84 | |||
85 | if (symbol_conf.field_sep) { | ||
86 | fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 | ||
87 | "%s0x%"PRIx64"%s%s:%s\n"; | ||
88 | } else { | ||
89 | fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 | ||
90 | "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; | ||
91 | symbol_conf.field_sep = " "; | ||
92 | } | ||
93 | |||
94 | printf(fmt, | ||
95 | sample->pid, | ||
96 | symbol_conf.field_sep, | ||
97 | sample->tid, | ||
98 | symbol_conf.field_sep, | ||
99 | event->ip.ip, | ||
100 | symbol_conf.field_sep, | ||
101 | sample->addr, | ||
102 | symbol_conf.field_sep, | ||
103 | sample->weight, | ||
104 | symbol_conf.field_sep, | ||
105 | sample->data_src, | ||
106 | symbol_conf.field_sep, | ||
107 | al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", | ||
108 | al.sym ? al.sym->name : "???"); | ||
109 | |||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static int process_sample_event(struct perf_tool *tool, | ||
114 | union perf_event *event, | ||
115 | struct perf_sample *sample, | ||
116 | struct perf_evsel *evsel, | ||
117 | struct machine *machine) | ||
118 | { | ||
119 | return dump_raw_samples(tool, event, sample, evsel, machine); | ||
120 | } | ||
121 | |||
122 | static int report_raw_events(struct perf_mem *mem) | ||
123 | { | ||
124 | int err = -EINVAL; | ||
125 | int ret; | ||
126 | struct perf_session *session = perf_session__new(input_name, O_RDONLY, | ||
127 | 0, false, &mem->tool); | ||
128 | |||
129 | if (session == NULL) | ||
130 | return -ENOMEM; | ||
131 | |||
132 | if (mem->cpu_list) { | ||
133 | ret = perf_session__cpu_bitmap(session, mem->cpu_list, | ||
134 | mem->cpu_bitmap); | ||
135 | if (ret) | ||
136 | goto out_delete; | ||
137 | } | ||
138 | |||
139 | if (symbol__init() < 0) | ||
140 | return -1; | ||
141 | |||
142 | printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); | ||
143 | |||
144 | err = perf_session__process_events(session, &mem->tool); | ||
145 | if (err) | ||
146 | return err; | ||
147 | |||
148 | return 0; | ||
149 | |||
150 | out_delete: | ||
151 | perf_session__delete(session); | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | static int report_events(int argc, const char **argv, struct perf_mem *mem) | ||
156 | { | ||
157 | const char **rep_argv; | ||
158 | int ret, i = 0, j, rep_argc; | ||
159 | |||
160 | if (mem->dump_raw) | ||
161 | return report_raw_events(mem); | ||
162 | |||
163 | rep_argc = argc + 3; | ||
164 | rep_argv = calloc(rep_argc + 1, sizeof(char *)); | ||
165 | if (!rep_argv) | ||
166 | return -1; | ||
167 | |||
168 | rep_argv[i++] = strdup("report"); | ||
169 | rep_argv[i++] = strdup("--mem-mode"); | ||
170 | rep_argv[i++] = strdup("-n"); /* display number of samples */ | ||
171 | |||
172 | /* | ||
173 | * there is no weight (cost) associated with stores, so don't print | ||
174 | * the column | ||
175 | */ | ||
176 | if (strcmp(mem_operation, MEM_OPERATION_LOAD)) | ||
177 | rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," | ||
178 | "dso_daddr,tlb,locked"); | ||
179 | |||
180 | for (j = 1; j < argc; j++, i++) | ||
181 | rep_argv[i] = argv[j]; | ||
182 | |||
183 | ret = cmd_report(i, rep_argv, NULL); | ||
184 | free(rep_argv); | ||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) | ||
189 | { | ||
190 | struct stat st; | ||
191 | struct perf_mem mem = { | ||
192 | .tool = { | ||
193 | .sample = process_sample_event, | ||
194 | .mmap = perf_event__process_mmap, | ||
195 | .comm = perf_event__process_comm, | ||
196 | .lost = perf_event__process_lost, | ||
197 | .fork = perf_event__process_fork, | ||
198 | .build_id = perf_event__process_build_id, | ||
199 | .ordered_samples = true, | ||
200 | }, | ||
201 | .input_name = "perf.data", | ||
202 | }; | ||
203 | const struct option mem_options[] = { | ||
204 | OPT_STRING('t', "type", &mem_operation, | ||
205 | "type", "memory operations(load/store)"), | ||
206 | OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, | ||
207 | "dump raw samples in ASCII"), | ||
208 | OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, | ||
209 | "Only display entries resolved to a symbol"), | ||
210 | OPT_STRING('i', "input", &input_name, "file", | ||
211 | "input file name"), | ||
212 | OPT_STRING('C', "cpu", &mem.cpu_list, "cpu", | ||
213 | "list of cpus to profile"), | ||
214 | OPT_STRING('x', "field-separator", &symbol_conf.field_sep, | ||
215 | "separator", | ||
216 | "separator for columns, no spaces will be added" | ||
217 | " between columns '.' is reserved."), | ||
218 | OPT_END() | ||
219 | }; | ||
220 | |||
221 | argc = parse_options(argc, argv, mem_options, mem_usage, | ||
222 | PARSE_OPT_STOP_AT_NON_OPTION); | ||
223 | |||
224 | if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) | ||
225 | usage_with_options(mem_usage, mem_options); | ||
226 | |||
227 | if (!mem.input_name || !strlen(mem.input_name)) { | ||
228 | if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) | ||
229 | mem.input_name = "-"; | ||
230 | else | ||
231 | mem.input_name = "perf.data"; | ||
232 | } | ||
233 | |||
234 | if (!strncmp(argv[0], "rec", 3)) | ||
235 | return __cmd_record(argc, argv); | ||
236 | else if (!strncmp(argv[0], "rep", 3)) | ||
237 | return report_events(argc, argv, &mem); | ||
238 | else | ||
239 | usage_with_options(mem_usage, mem_options); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 08143bd854c7..b210d62907e4 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h | |||
@@ -36,6 +36,7 @@ extern int cmd_kvm(int argc, const char **argv, const char *prefix); | |||
36 | extern int cmd_test(int argc, const char **argv, const char *prefix); | 36 | extern int cmd_test(int argc, const char **argv, const char *prefix); |
37 | extern int cmd_trace(int argc, const char **argv, const char *prefix); | 37 | extern int cmd_trace(int argc, const char **argv, const char *prefix); |
38 | extern int cmd_inject(int argc, const char **argv, const char *prefix); | 38 | extern int cmd_inject(int argc, const char **argv, const char *prefix); |
39 | extern int cmd_mem(int argc, const char **argv, const char *prefix); | ||
39 | 40 | ||
40 | extern int find_scripts(char **scripts_array, char **scripts_path_array); | 41 | extern int find_scripts(char **scripts_array, char **scripts_path_array); |
41 | #endif | 42 | #endif |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index a28e31be6cb4..0906fc401c52 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
@@ -14,6 +14,7 @@ perf-kmem mainporcelain common | |||
14 | perf-kvm mainporcelain common | 14 | perf-kvm mainporcelain common |
15 | perf-list mainporcelain common | 15 | perf-list mainporcelain common |
16 | perf-lock mainporcelain common | 16 | perf-lock mainporcelain common |
17 | perf-mem mainporcelain common | ||
17 | perf-probe mainporcelain full | 18 | perf-probe mainporcelain full |
18 | perf-record mainporcelain common | 19 | perf-record mainporcelain common |
19 | perf-report mainporcelain common | 20 | perf-report mainporcelain common |
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index f6ba7b73f40e..31c9380cfa64 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -60,6 +60,7 @@ static struct cmd_struct commands[] = { | |||
60 | { "trace", cmd_trace, 0 }, | 60 | { "trace", cmd_trace, 0 }, |
61 | #endif | 61 | #endif |
62 | { "inject", cmd_inject, 0 }, | 62 | { "inject", cmd_inject, 0 }, |
63 | { "mem", cmd_mem, 0 }, | ||
63 | }; | 64 | }; |
64 | 65 | ||
65 | struct pager_config { | 66 | struct pager_config { |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 99cc719ce736..6b32721f829a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -520,6 +520,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | |||
520 | void hist_entry__free(struct hist_entry *he) | 520 | void hist_entry__free(struct hist_entry *he) |
521 | { | 521 | { |
522 | free(he->branch_info); | 522 | free(he->branch_info); |
523 | free(he->mem_info); | ||
523 | free(he); | 524 | free(he); |
524 | } | 525 | } |
525 | 526 | ||