diff options
author | Stephane Eranian <eranian@google.com> | 2014-12-17 10:23:55 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2015-01-21 11:24:31 -0500 |
commit | 67121f85e464d66596f99afd8d188c1ae892f8fb (patch) | |
tree | 06dff15237cd0fa772f2c2e240e131058395fda4 /tools/perf | |
parent | 25dd9171f51c482eb7c4dc8618766ae733756e2d (diff) |
perf mem: Enable sampling loads and stores simultaneously
This patch modifies perf mem to default to sampling loads and stores
simultaneously. It could only do one or the other before yet there was
no hardware restriction preventing simultaneous collection. With this
patch, one run is sufficient to collect both.
It is still possible to sample only loads or stores by using the
-t option:
$ perf mem -t load rec
$ perf mem -t load rep
Or
$ perf mem -t store rec
$ perf mem -t store rep
The perf report TUI will show one event at a time. The store output will
contain a Weight column which will be empty.
In V2, we updated the man pages to reflect the change and also simplify
the initialization of the argv vector passed to the cmd_*() functions as
per LKML feedback.
In V3, we fixed typos in the changelog.
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Fowles <rfowles@redhat.com>
Link: http://lkml.kernel.org/r/20141217152355.GA10053@thinkpad
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-mem.txt | 9 | ||||
-rw-r--r-- | tools/perf/builtin-mem.c | 123 |
2 files changed, 105 insertions, 27 deletions
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 1d78a4064da4..43310d8661fe 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt | |||
@@ -12,11 +12,12 @@ SYNOPSIS | |||
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | "perf mem -t <TYPE> record" runs a command and gathers memory operation data | 15 | "perf mem record" runs a command and gathers memory operation data |
16 | from it, into perf.data. Perf record options are accepted and are passed through. | 16 | from it, into perf.data. Perf record options are accepted and are passed through. |
17 | 17 | ||
18 | "perf mem -t <TYPE> report" displays the result. It invokes perf report with the | 18 | "perf mem report" displays the result. It invokes perf report with the |
19 | right set of options to display a memory access profile. | 19 | right set of options to display a memory access profile. By default, loads |
20 | and stores are sampled. Use the -t option to limit to loads or stores. | ||
20 | 21 | ||
21 | Note that on Intel systems the memory latency reported is the use-latency, | 22 | Note that on Intel systems the memory latency reported is the use-latency, |
22 | not the pure load (or store latency). Use latency includes any pipeline | 23 | not the pure load (or store latency). Use latency includes any pipeline |
@@ -29,7 +30,7 @@ OPTIONS | |||
29 | 30 | ||
30 | -t:: | 31 | -t:: |
31 | --type=:: | 32 | --type=:: |
32 | Select the memory operation type: load or store (default: load) | 33 | Select the memory operation type: load or store (default: load,store) |
33 | 34 | ||
34 | -D:: | 35 | -D:: |
35 | --dump-raw-samples=:: | 36 | --dump-raw-samples=:: |
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 24db6ffe2957..1eded0a3a509 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c | |||
@@ -7,10 +7,13 @@ | |||
7 | #include "util/session.h" | 7 | #include "util/session.h" |
8 | #include "util/data.h" | 8 | #include "util/data.h" |
9 | 9 | ||
10 | #define MEM_OPERATION_LOAD "load" | 10 | #define MEM_OPERATION_LOAD 0x1 |
11 | #define MEM_OPERATION_STORE "store" | 11 | #define MEM_OPERATION_STORE 0x2 |
12 | 12 | ||
13 | static const char *mem_operation = MEM_OPERATION_LOAD; | 13 | /* |
14 | * default to both load an store sampling | ||
15 | */ | ||
16 | static int mem_operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE; | ||
14 | 17 | ||
15 | struct perf_mem { | 18 | struct perf_mem { |
16 | struct perf_tool tool; | 19 | struct perf_tool tool; |
@@ -25,26 +28,30 @@ static int __cmd_record(int argc, const char **argv) | |||
25 | { | 28 | { |
26 | int rec_argc, i = 0, j; | 29 | int rec_argc, i = 0, j; |
27 | const char **rec_argv; | 30 | const char **rec_argv; |
28 | char event[64]; | ||
29 | int ret; | 31 | int ret; |
30 | 32 | ||
31 | rec_argc = argc + 4; | 33 | rec_argc = argc + 7; /* max number of arguments */ |
32 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 34 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
33 | if (!rec_argv) | 35 | if (!rec_argv) |
34 | return -1; | 36 | return -1; |
35 | 37 | ||
36 | rec_argv[i++] = strdup("record"); | 38 | rec_argv[i++] = "record"; |
37 | if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) | ||
38 | rec_argv[i++] = strdup("-W"); | ||
39 | rec_argv[i++] = strdup("-d"); | ||
40 | rec_argv[i++] = strdup("-e"); | ||
41 | 39 | ||
42 | if (strcmp(mem_operation, MEM_OPERATION_LOAD)) | 40 | if (mem_operation & MEM_OPERATION_LOAD) |
43 | sprintf(event, "cpu/mem-stores/pp"); | 41 | rec_argv[i++] = "-W"; |
44 | else | 42 | |
45 | sprintf(event, "cpu/mem-loads/pp"); | 43 | rec_argv[i++] = "-d"; |
44 | |||
45 | if (mem_operation & MEM_OPERATION_LOAD) { | ||
46 | rec_argv[i++] = "-e"; | ||
47 | rec_argv[i++] = "cpu/mem-loads/pp"; | ||
48 | } | ||
49 | |||
50 | if (mem_operation & MEM_OPERATION_STORE) { | ||
51 | rec_argv[i++] = "-e"; | ||
52 | rec_argv[i++] = "cpu/mem-stores/pp"; | ||
53 | } | ||
46 | 54 | ||
47 | rec_argv[i++] = strdup(event); | ||
48 | for (j = 1; j < argc; j++, i++) | 55 | for (j = 1; j < argc; j++, i++) |
49 | rec_argv[i] = argv[j]; | 56 | rec_argv[i] = argv[j]; |
50 | 57 | ||
@@ -162,17 +169,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) | |||
162 | if (!rep_argv) | 169 | if (!rep_argv) |
163 | return -1; | 170 | return -1; |
164 | 171 | ||
165 | rep_argv[i++] = strdup("report"); | 172 | rep_argv[i++] = "report"; |
166 | rep_argv[i++] = strdup("--mem-mode"); | 173 | rep_argv[i++] = "--mem-mode"; |
167 | rep_argv[i++] = strdup("-n"); /* display number of samples */ | 174 | rep_argv[i++] = "-n"; /* display number of samples */ |
168 | 175 | ||
169 | /* | 176 | /* |
170 | * there is no weight (cost) associated with stores, so don't print | 177 | * there is no weight (cost) associated with stores, so don't print |
171 | * the column | 178 | * the column |
172 | */ | 179 | */ |
173 | if (strcmp(mem_operation, MEM_OPERATION_LOAD)) | 180 | if (!(mem_operation & MEM_OPERATION_LOAD)) |
174 | rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," | 181 | rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," |
175 | "dso_daddr,tlb,locked"); | 182 | "dso_daddr,tlb,locked"; |
176 | 183 | ||
177 | for (j = 1; j < argc; j++, i++) | 184 | for (j = 1; j < argc; j++, i++) |
178 | rep_argv[i] = argv[j]; | 185 | rep_argv[i] = argv[j]; |
@@ -182,6 +189,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) | |||
182 | return ret; | 189 | return ret; |
183 | } | 190 | } |
184 | 191 | ||
192 | struct mem_mode { | ||
193 | const char *name; | ||
194 | int mode; | ||
195 | }; | ||
196 | |||
197 | #define MEM_OPT(n, m) \ | ||
198 | { .name = n, .mode = (m) } | ||
199 | |||
200 | #define MEM_END { .name = NULL } | ||
201 | |||
202 | static const struct mem_mode mem_modes[]={ | ||
203 | MEM_OPT("load", MEM_OPERATION_LOAD), | ||
204 | MEM_OPT("store", MEM_OPERATION_STORE), | ||
205 | MEM_END | ||
206 | }; | ||
207 | |||
208 | static int | ||
209 | parse_mem_ops(const struct option *opt, const char *str, int unset) | ||
210 | { | ||
211 | int *mode = (int *)opt->value; | ||
212 | const struct mem_mode *m; | ||
213 | char *s, *os = NULL, *p; | ||
214 | int ret = -1; | ||
215 | |||
216 | if (unset) | ||
217 | return 0; | ||
218 | |||
219 | /* str may be NULL in case no arg is passed to -t */ | ||
220 | if (str) { | ||
221 | /* because str is read-only */ | ||
222 | s = os = strdup(str); | ||
223 | if (!s) | ||
224 | return -1; | ||
225 | |||
226 | /* reset mode */ | ||
227 | *mode = 0; | ||
228 | |||
229 | for (;;) { | ||
230 | p = strchr(s, ','); | ||
231 | if (p) | ||
232 | *p = '\0'; | ||
233 | |||
234 | for (m = mem_modes; m->name; m++) { | ||
235 | if (!strcasecmp(s, m->name)) | ||
236 | break; | ||
237 | } | ||
238 | if (!m->name) { | ||
239 | fprintf(stderr, "unknown sampling op %s," | ||
240 | " check man page\n", s); | ||
241 | goto error; | ||
242 | } | ||
243 | |||
244 | *mode |= m->mode; | ||
245 | |||
246 | if (!p) | ||
247 | break; | ||
248 | |||
249 | s = p + 1; | ||
250 | } | ||
251 | } | ||
252 | ret = 0; | ||
253 | |||
254 | if (*mode == 0) | ||
255 | *mode = MEM_OPERATION_LOAD; | ||
256 | error: | ||
257 | free(os); | ||
258 | return ret; | ||
259 | } | ||
260 | |||
185 | int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) | 261 | int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) |
186 | { | 262 | { |
187 | struct stat st; | 263 | struct stat st; |
@@ -199,8 +275,9 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
199 | .input_name = "perf.data", | 275 | .input_name = "perf.data", |
200 | }; | 276 | }; |
201 | const struct option mem_options[] = { | 277 | const struct option mem_options[] = { |
202 | OPT_STRING('t', "type", &mem_operation, | 278 | OPT_CALLBACK('t', "type", &mem_operation, |
203 | "type", "memory operations(load/store)"), | 279 | "type", "memory operations(load,store) Default load,store", |
280 | parse_mem_ops), | ||
204 | OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, | 281 | OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, |
205 | "dump raw samples in ASCII"), | 282 | "dump raw samples in ASCII"), |
206 | OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, | 283 | OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, |