diff options
| author | Jiri Olsa <jolsa@kernel.org> | 2016-04-12 09:29:26 -0400 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-04-13 09:11:51 -0400 |
| commit | 99623c628f5425f09b5321cf621af1da29c0c47d (patch) | |
| tree | 504170bab205d25e66b6b7d88ab4f0b38d65d184 | |
| parent | e632aa69c919462a7f93c8799b97c8a9ddd48fc2 (diff) | |
perf sched: Add compact display option
Add compact map display that does not output the whole cpu matrix, only
cpus that got event.
$ perf sched map --compact
*A0 1082427.094098 secs A0 => perf:19404 (CPU 2)
A0 *. 1082427.094127 secs . => swapper:0 (CPU 1)
A0 . *B0 1082427.094174 secs B0 => rcuos/2:25 (CPU 3)
A0 . *. 1082427.094177 secs
*C0 . . 1082427.094187 secs C0 => migration/2:21
C0 *A0 . 1082427.094193 secs
*. A0 . 1082427.094195 secs
*D0 A0 . 1082427.094402 secs D0 => rngd:968
*. A0 . 1082427.094406 secs
. *E0 . 1082427.095221 secs E0 => kworker/1:1:5333
. E0 *F0 1082427.095227 secs F0 => xterm:3342
It helps to display sane output for small thread loads on big cpu
servers.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1460467771-26532-4-git-send-email-jolsa@kernel.org
[ Add entry in 'perf sched' man page ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
| -rw-r--r-- | tools/perf/Documentation/perf-sched.txt | 7 | ||||
| -rw-r--r-- | tools/perf/builtin-sched.c | 62 |
2 files changed, 63 insertions, 6 deletions
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 8ff4df956951..89b0c5b7fe84 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt | |||
| @@ -50,6 +50,13 @@ OPTIONS | |||
| 50 | --dump-raw-trace=:: | 50 | --dump-raw-trace=:: |
| 51 | Display verbose dump of the sched data. | 51 | Display verbose dump of the sched data. |
| 52 | 52 | ||
| 53 | OPTIONS for 'perf sched map' | ||
| 54 | ---------------------------- | ||
| 55 | |||
| 56 | --compact:: | ||
| 57 | Show only CPUs with activity. Helps visualizing on high core | ||
| 58 | count systems. | ||
| 59 | |||
| 53 | SEE ALSO | 60 | SEE ALSO |
| 54 | -------- | 61 | -------- |
| 55 | linkperf:perf-record[1] | 62 | linkperf:perf-record[1] |
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 871b55ae22a4..64dd94667055 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
| @@ -122,6 +122,12 @@ struct trace_sched_handler { | |||
| 122 | struct machine *machine); | 122 | struct machine *machine); |
| 123 | }; | 123 | }; |
| 124 | 124 | ||
| 125 | struct perf_sched_map { | ||
| 126 | DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); | ||
| 127 | int *comp_cpus; | ||
| 128 | bool comp; | ||
| 129 | }; | ||
| 130 | |||
| 125 | struct perf_sched { | 131 | struct perf_sched { |
| 126 | struct perf_tool tool; | 132 | struct perf_tool tool; |
| 127 | const char *sort_order; | 133 | const char *sort_order; |
| @@ -173,6 +179,7 @@ struct perf_sched { | |||
| 173 | struct list_head sort_list, cmp_pid; | 179 | struct list_head sort_list, cmp_pid; |
| 174 | bool force; | 180 | bool force; |
| 175 | bool skip_merge; | 181 | bool skip_merge; |
| 182 | struct perf_sched_map map; | ||
| 176 | }; | 183 | }; |
| 177 | 184 | ||
| 178 | static u64 get_nsecs(void) | 185 | static u64 get_nsecs(void) |
| @@ -1347,13 +1354,24 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, | |||
| 1347 | int new_shortname; | 1354 | int new_shortname; |
| 1348 | u64 timestamp0, timestamp = sample->time; | 1355 | u64 timestamp0, timestamp = sample->time; |
| 1349 | s64 delta; | 1356 | s64 delta; |
| 1350 | int cpu, this_cpu = sample->cpu; | 1357 | int i, this_cpu = sample->cpu; |
| 1358 | int cpus_nr; | ||
| 1359 | bool new_cpu = false; | ||
| 1351 | 1360 | ||
| 1352 | BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); | 1361 | BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); |
| 1353 | 1362 | ||
| 1354 | if (this_cpu > sched->max_cpu) | 1363 | if (this_cpu > sched->max_cpu) |
| 1355 | sched->max_cpu = this_cpu; | 1364 | sched->max_cpu = this_cpu; |
| 1356 | 1365 | ||
| 1366 | if (sched->map.comp) { | ||
| 1367 | cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); | ||
| 1368 | if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { | ||
| 1369 | sched->map.comp_cpus[cpus_nr++] = this_cpu; | ||
| 1370 | new_cpu = true; | ||
| 1371 | } | ||
| 1372 | } else | ||
| 1373 | cpus_nr = sched->max_cpu; | ||
| 1374 | |||
| 1357 | timestamp0 = sched->cpu_last_switched[this_cpu]; | 1375 | timestamp0 = sched->cpu_last_switched[this_cpu]; |
| 1358 | sched->cpu_last_switched[this_cpu] = timestamp; | 1376 | sched->cpu_last_switched[this_cpu] = timestamp; |
| 1359 | if (timestamp0) | 1377 | if (timestamp0) |
| @@ -1400,7 +1418,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, | |||
| 1400 | new_shortname = 1; | 1418 | new_shortname = 1; |
| 1401 | } | 1419 | } |
| 1402 | 1420 | ||
| 1403 | for (cpu = 0; cpu <= sched->max_cpu; cpu++) { | 1421 | for (i = 0; i < cpus_nr; i++) { |
| 1422 | int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; | ||
| 1423 | |||
| 1404 | if (cpu != this_cpu) | 1424 | if (cpu != this_cpu) |
| 1405 | printf(" "); | 1425 | printf(" "); |
| 1406 | else | 1426 | else |
| @@ -1414,12 +1434,15 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, | |||
| 1414 | 1434 | ||
| 1415 | printf(" %12.6f secs ", (double)timestamp/1e9); | 1435 | printf(" %12.6f secs ", (double)timestamp/1e9); |
| 1416 | if (new_shortname) { | 1436 | if (new_shortname) { |
| 1417 | printf("%s => %s:%d\n", | 1437 | printf("%s => %s:%d", |
| 1418 | sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); | 1438 | sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); |
| 1419 | } else { | ||
| 1420 | printf("\n"); | ||
| 1421 | } | 1439 | } |
| 1422 | 1440 | ||
| 1441 | if (sched->map.comp && new_cpu) | ||
| 1442 | printf(" (CPU %d)", this_cpu); | ||
| 1443 | |||
| 1444 | printf("\n"); | ||
| 1445 | |||
| 1423 | thread__put(sched_in); | 1446 | thread__put(sched_in); |
| 1424 | 1447 | ||
| 1425 | return 0; | 1448 | return 0; |
| @@ -1675,9 +1698,22 @@ static int perf_sched__lat(struct perf_sched *sched) | |||
| 1675 | return 0; | 1698 | return 0; |
| 1676 | } | 1699 | } |
| 1677 | 1700 | ||
| 1701 | static int setup_map_cpus(struct perf_sched *sched) | ||
| 1702 | { | ||
| 1703 | sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); | ||
| 1704 | |||
| 1705 | if (sched->map.comp) { | ||
| 1706 | sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); | ||
| 1707 | return sched->map.comp_cpus ? 0 : -1; | ||
| 1708 | } | ||
| 1709 | |||
| 1710 | return 0; | ||
| 1711 | } | ||
| 1712 | |||
| 1678 | static int perf_sched__map(struct perf_sched *sched) | 1713 | static int perf_sched__map(struct perf_sched *sched) |
| 1679 | { | 1714 | { |
| 1680 | sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); | 1715 | if (setup_map_cpus(sched)) |
| 1716 | return -1; | ||
| 1681 | 1717 | ||
| 1682 | setup_pager(); | 1718 | setup_pager(); |
| 1683 | if (perf_sched__read_events(sched)) | 1719 | if (perf_sched__read_events(sched)) |
| @@ -1831,6 +1867,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1831 | "dump raw trace in ASCII"), | 1867 | "dump raw trace in ASCII"), |
| 1832 | OPT_END() | 1868 | OPT_END() |
| 1833 | }; | 1869 | }; |
| 1870 | const struct option map_options[] = { | ||
| 1871 | OPT_BOOLEAN(0, "compact", &sched.map.comp, | ||
| 1872 | "map output in compact mode"), | ||
| 1873 | OPT_END() | ||
| 1874 | }; | ||
| 1834 | const char * const latency_usage[] = { | 1875 | const char * const latency_usage[] = { |
| 1835 | "perf sched latency [<options>]", | 1876 | "perf sched latency [<options>]", |
| 1836 | NULL | 1877 | NULL |
| @@ -1839,6 +1880,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1839 | "perf sched replay [<options>]", | 1880 | "perf sched replay [<options>]", |
| 1840 | NULL | 1881 | NULL |
| 1841 | }; | 1882 | }; |
| 1883 | const char * const map_usage[] = { | ||
| 1884 | "perf sched map [<options>]", | ||
| 1885 | NULL | ||
| 1886 | }; | ||
| 1842 | const char *const sched_subcommands[] = { "record", "latency", "map", | 1887 | const char *const sched_subcommands[] = { "record", "latency", "map", |
| 1843 | "replay", "script", NULL }; | 1888 | "replay", "script", NULL }; |
| 1844 | const char *sched_usage[] = { | 1889 | const char *sched_usage[] = { |
| @@ -1887,6 +1932,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1887 | setup_sorting(&sched, latency_options, latency_usage); | 1932 | setup_sorting(&sched, latency_options, latency_usage); |
| 1888 | return perf_sched__lat(&sched); | 1933 | return perf_sched__lat(&sched); |
| 1889 | } else if (!strcmp(argv[0], "map")) { | 1934 | } else if (!strcmp(argv[0], "map")) { |
| 1935 | if (argc) { | ||
| 1936 | argc = parse_options(argc, argv, map_options, replay_usage, 0); | ||
| 1937 | if (argc) | ||
| 1938 | usage_with_options(map_usage, map_options); | ||
| 1939 | } | ||
| 1890 | sched.tp_handler = &map_ops; | 1940 | sched.tp_handler = &map_ops; |
| 1891 | setup_sorting(&sched, latency_options, latency_usage); | 1941 | setup_sorting(&sched, latency_options, latency_usage); |
| 1892 | return perf_sched__map(&sched); | 1942 | return perf_sched__map(&sched); |
