aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2011-07-04 07:57:50 -0400
committerIngo Molnar <mingo@elte.hu>2011-07-05 04:44:44 -0400
commit5d67be97f8903d05ce53597fb5f3bc25a45e8026 (patch)
tree5ce8c2407d50570d7fee79b9c7f24e8eb89384b4
parent9f8b6a6cf0ee78de87ebe1e87f54bec1c1741ef7 (diff)
perf report/annotate/script: Add option to specify a CPU range
Add an option to perf report/annotate/script to specify which CPUs to operate on. This enables us to take a single system wide profile and analyse each CPU (or group of CPUs) in isolation. This was useful when profiling a multiprocess workload where the bottleneck was on one CPU but this was hidden in the overall profile. Per process and per thread breakdowns didn't help because multiple processes were running on each CPU and no single process consumed an entire CPU. The patch converts the list of CPUs returned by cpu_map__new into a bitmap for fast lookup. I wanted to use -C to be consistent with perf top/record/stat, but unfortunately perf report already uses -C <comms>. v2: Incorporate suggestions from David Ahern: - Added -c to perf script - Check that SAMPLE_CPU is set when -c is used - Update documentation v3: Create perf_session__cpu_bitmap() Signed-off-by: Anton Blanchard <anton@samba.org> Acked-by: David Ahern <dsahern@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Link: http://lkml.kernel.org/r/20110704215750.11647eb9@kryten Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--tools/perf/Documentation/perf-annotate.txt6
-rw-r--r--tools/perf/Documentation/perf-report.txt6
-rw-r--r--tools/perf/Documentation/perf-script.txt6
-rw-r--r--tools/perf/builtin-annotate.c15
-rw-r--r--tools/perf/builtin-report.c15
-rw-r--r--tools/perf/builtin-script.c13
-rw-r--r--tools/perf/util/session.c38
-rw-r--r--tools/perf/util/session.h3
8 files changed, 102 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 6f5a498608b2..85c5f026930d 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -66,6 +66,12 @@ OPTIONS
66 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
67 samples, TAB/UNTAB cycles through the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
68 68
69-c::
70--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
71 be provided as a comma-separated list with no space: 0,1. Ranges of
72 CPUs are specified with -: 0-2. Default is to report samples on all
73 CPUs.
74
69SEE ALSO 75SEE ALSO
70-------- 76--------
71linkperf:perf-record[1], linkperf:perf-report[1] 77linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index cfa8e513d0fb..04253c07d19a 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -128,6 +128,12 @@ OPTIONS
128--symfs=<directory>:: 128--symfs=<directory>::
129 Look for files with symbols relative to this directory. 129 Look for files with symbols relative to this directory.
130 130
131-c::
132--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
133 be provided as a comma-separated list with no space: 0,1. Ranges of
134 CPUs are specified with -: 0-2. Default is to report samples on all
135 CPUs.
136
131SEE ALSO 137SEE ALSO
132-------- 138--------
133linkperf:perf-stat[1] 139linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index c6068cb43f57..db017867d9e8 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -182,6 +182,12 @@ OPTIONS
182--hide-call-graph:: 182--hide-call-graph::
183 When printing symbols do not display call chain. 183 When printing symbols do not display call chain.
184 184
185-c::
186--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
187 be provided as a comma-separated list with no space: 0,1. Ranges of
188 CPUs are specified with -: 0-2. Default is to report samples on all
189 CPUs.
190
185SEE ALSO 191SEE ALSO
186-------- 192--------
187linkperf:perf-record[1], linkperf:perf-script-perl[1], 193linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 7b139e1e7e86..555aefd7fe01 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,6 +28,8 @@
28#include "util/hist.h" 28#include "util/hist.h"
29#include "util/session.h" 29#include "util/session.h"
30 30
31#include <linux/bitmap.h>
32
31static char const *input_name = "perf.data"; 33static char const *input_name = "perf.data";
32 34
33static bool force, use_tui, use_stdio; 35static bool force, use_tui, use_stdio;
@@ -38,6 +40,9 @@ static bool print_line;
38 40
39static const char *sym_hist_filter; 41static const char *sym_hist_filter;
40 42
43static const char *cpu_list;
44static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
45
41static int perf_evlist__add_sample(struct perf_evlist *evlist, 46static int perf_evlist__add_sample(struct perf_evlist *evlist,
42 struct perf_sample *sample, 47 struct perf_sample *sample,
43 struct perf_evsel *evsel, 48 struct perf_evsel *evsel,
@@ -90,6 +95,9 @@ static int process_sample_event(union perf_event *event,
90 return -1; 95 return -1;
91 } 96 }
92 97
98 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
99 return 0;
100
93 if (!al.filtered && 101 if (!al.filtered &&
94 perf_evlist__add_sample(session->evlist, sample, evsel, &al)) { 102 perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
95 pr_warning("problem incrementing symbol count, " 103 pr_warning("problem incrementing symbol count, "
@@ -177,6 +185,12 @@ static int __cmd_annotate(void)
177 if (session == NULL) 185 if (session == NULL)
178 return -ENOMEM; 186 return -ENOMEM;
179 187
188 if (cpu_list) {
189 ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
190 if (ret)
191 goto out_delete;
192 }
193
180 ret = perf_session__process_events(session, &event_ops); 194 ret = perf_session__process_events(session, &event_ops);
181 if (ret) 195 if (ret)
182 goto out_delete; 196 goto out_delete;
@@ -252,6 +266,7 @@ static const struct option options[] = {
252 "print matching source lines (may be slow)"), 266 "print matching source lines (may be slow)"),
253 OPT_BOOLEAN('P', "full-paths", &full_paths, 267 OPT_BOOLEAN('P', "full-paths", &full_paths,
254 "Don't shorten the displayed pathnames"), 268 "Don't shorten the displayed pathnames"),
269 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
255 OPT_END() 270 OPT_END()
256}; 271};
257 272
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5d43d0181d63..f854efda7686 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,6 +33,8 @@
33#include "util/sort.h" 33#include "util/sort.h"
34#include "util/hist.h" 34#include "util/hist.h"
35 35
36#include <linux/bitmap.h>
37
36static char const *input_name = "perf.data"; 38static char const *input_name = "perf.data";
37 39
38static bool force, use_tui, use_stdio; 40static bool force, use_tui, use_stdio;
@@ -49,6 +51,9 @@ static char callchain_default_opt[] = "fractal,0.5,callee";
49static bool inverted_callchain; 51static bool inverted_callchain;
50static symbol_filter_t annotate_init; 52static symbol_filter_t annotate_init;
51 53
54static const char *cpu_list;
55static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
56
52static int perf_session__add_hist_entry(struct perf_session *session, 57static int perf_session__add_hist_entry(struct perf_session *session,
53 struct addr_location *al, 58 struct addr_location *al,
54 struct perf_sample *sample, 59 struct perf_sample *sample,
@@ -117,6 +122,9 @@ static int process_sample_event(union perf_event *event,
117 if (al.filtered || (hide_unresolved && al.sym == NULL)) 122 if (al.filtered || (hide_unresolved && al.sym == NULL))
118 return 0; 123 return 0;
119 124
125 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
126 return 0;
127
120 if (al.map != NULL) 128 if (al.map != NULL)
121 al.map->dso->hit = 1; 129 al.map->dso->hit = 1;
122 130
@@ -263,6 +271,12 @@ static int __cmd_report(void)
263 if (session == NULL) 271 if (session == NULL)
264 return -ENOMEM; 272 return -ENOMEM;
265 273
274 if (cpu_list) {
275 ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
276 if (ret)
277 goto out_delete;
278 }
279
266 if (show_threads) 280 if (show_threads)
267 perf_read_values_init(&show_threads_values); 281 perf_read_values_init(&show_threads_values);
268 282
@@ -473,6 +487,7 @@ static const struct option options[] = {
473 "Only display entries resolved to a symbol"), 487 "Only display entries resolved to a symbol"),
474 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 488 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
475 "Look for files with symbols relative to this directory"), 489 "Look for files with symbols relative to this directory"),
490 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
476 OPT_END() 491 OPT_END()
477}; 492};
478 493
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3056b45b3dd6..09024ec2ab2e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -13,6 +13,7 @@
13#include "util/util.h" 13#include "util/util.h"
14#include "util/evlist.h" 14#include "util/evlist.h"
15#include "util/evsel.h" 15#include "util/evsel.h"
16#include <linux/bitmap.h>
16 17
17static char const *script_name; 18static char const *script_name;
18static char const *generate_script_lang; 19static char const *generate_script_lang;
@@ -21,6 +22,8 @@ static u64 last_timestamp;
21static u64 nr_unordered; 22static u64 nr_unordered;
22extern const struct option record_options[]; 23extern const struct option record_options[];
23static bool no_callchain; 24static bool no_callchain;
25static const char *cpu_list;
26static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
24 27
25enum perf_output_field { 28enum perf_output_field {
26 PERF_OUTPUT_COMM = 1U << 0, 29 PERF_OUTPUT_COMM = 1U << 0,
@@ -453,6 +456,10 @@ static int process_sample_event(union perf_event *event,
453 last_timestamp = sample->time; 456 last_timestamp = sample->time;
454 return 0; 457 return 0;
455 } 458 }
459
460 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
461 return 0;
462
456 scripting_ops->process_event(event, sample, evsel, session, thread); 463 scripting_ops->process_event(event, sample, evsel, session, thread);
457 464
458 session->hists.stats.total_period += sample->period; 465 session->hists.stats.total_period += sample->period;
@@ -1075,6 +1082,7 @@ static const struct option options[] = {
1075 OPT_CALLBACK('f', "fields", NULL, "str", 1082 OPT_CALLBACK('f', "fields", NULL, "str",
1076 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", 1083 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
1077 parse_output_fields), 1084 parse_output_fields),
1085 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
1078 1086
1079 OPT_END() 1087 OPT_END()
1080}; 1088};
@@ -1255,6 +1263,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1255 if (session == NULL) 1263 if (session == NULL)
1256 return -ENOMEM; 1264 return -ENOMEM;
1257 1265
1266 if (cpu_list) {
1267 if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
1268 return -1;
1269 }
1270
1258 if (!no_callchain) 1271 if (!no_callchain)
1259 symbol_conf.use_callchain = true; 1272 symbol_conf.use_callchain = true;
1260 else 1273 else
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 558bcf996949..080e5336d89f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -12,6 +12,7 @@
12#include "session.h" 12#include "session.h"
13#include "sort.h" 13#include "sort.h"
14#include "util.h" 14#include "util.h"
15#include "cpumap.h"
15 16
16static int perf_session__open(struct perf_session *self, bool force) 17static int perf_session__open(struct perf_session *self, bool force)
17{ 18{
@@ -1282,3 +1283,40 @@ void perf_session__print_ip(union perf_event *event,
1282 } 1283 }
1283 } 1284 }
1284} 1285}
1286
1287int perf_session__cpu_bitmap(struct perf_session *session,
1288 const char *cpu_list, unsigned long *cpu_bitmap)
1289{
1290 int i;
1291 struct cpu_map *map;
1292
1293 for (i = 0; i < PERF_TYPE_MAX; ++i) {
1294 struct perf_evsel *evsel;
1295
1296 evsel = perf_session__find_first_evtype(session, i);
1297 if (!evsel)
1298 continue;
1299
1300 if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
1301 pr_err("File does not contain CPU events. "
1302 "Remove -c option to proceed.\n");
1303 return -1;
1304 }
1305 }
1306
1307 map = cpu_map__new(cpu_list);
1308
1309 for (i = 0; i < map->nr; i++) {
1310 int cpu = map->map[i];
1311
1312 if (cpu >= MAX_NR_CPUS) {
1313 pr_err("Requested CPU %d too large. "
1314 "Consider raising MAX_NR_CPUS\n", cpu);
1315 return -1;
1316 }
1317
1318 set_bit(cpu, cpu_bitmap);
1319 }
1320
1321 return 0;
1322}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index de4178d7bb7b..5de754f4b7f3 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -172,4 +172,7 @@ void perf_session__print_ip(union perf_event *event,
172 struct perf_session *session, 172 struct perf_session *session,
173 int print_sym, int print_dso); 173 int print_sym, int print_dso);
174 174
175int perf_session__cpu_bitmap(struct perf_session *session,
176 const char *cpu_list, unsigned long *cpu_bitmap);
177
175#endif /* __PERF_SESSION_H */ 178#endif /* __PERF_SESSION_H */