diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-06-08 03:29:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-06-08 03:29:23 -0400 |
commit | aa3a655b159f11b1afe0dcdac5fb5b172f02b778 (patch) | |
tree | 577058fb95c7f2e2aacb3566874e75e17fcec9f8 | |
parent | 616d1c1b98ac79f30216a57a170dd7cea19b3df3 (diff) | |
parent | 7db91f251056f90fec4121f028680ab3153a0f3c (diff) |
Merge tag 'perf-core-for-mingo-20160606' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Tooling support for TopDown counters, recently added to the kernel (Andi Kleen)
- Show call graphs in 'perf script' when 1st event doesn't have it but some other has (He Kuang)
- Fix terminal cleanup when handling invalid .perfconfig files in 'perf top' (Taeung Song)
Build fixes:
- Respect CROSS_COMPILE for the linker in libapi (Lucas Stach)
Infrastructure changes:
- Fix perf_evlist__alloc_mmap() failure path (Wang Nan)
- Provide way to extract integer value from format_field (Arnaldo Carvalho de Melo)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | tools/lib/api/Makefile | 1 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 32 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/Build | 1 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/group.c | 27 | ||||
-rw-r--r-- | tools/perf/builtin-script.c | 23 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 165 | ||||
-rw-r--r-- | tools/perf/tests/parse-events.c | 4 | ||||
-rw-r--r-- | tools/perf/util/config.c | 22 | ||||
-rw-r--r-- | tools/perf/util/evlist.c | 5 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 25 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 2 | ||||
-rw-r--r-- | tools/perf/util/group.h | 7 | ||||
-rw-r--r-- | tools/perf/util/parse-events.l | 1 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 162 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 5 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 5 |
16 files changed, 441 insertions, 46 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 316f308a63ea..67ff93ec1515 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile | |||
@@ -10,6 +10,7 @@ endif | |||
10 | 10 | ||
11 | CC = $(CROSS_COMPILE)gcc | 11 | CC = $(CROSS_COMPILE)gcc |
12 | AR = $(CROSS_COMPILE)ar | 12 | AR = $(CROSS_COMPILE)ar |
13 | LD = $(CROSS_COMPILE)ld | ||
13 | 14 | ||
14 | MAKEFLAGS += --no-print-directory | 15 | MAKEFLAGS += --no-print-directory |
15 | 16 | ||
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04f23b404bbc..d96ccd4844df 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements. | |||
204 | --no-aggr:: | 204 | --no-aggr:: |
205 | Do not aggregate counts across all monitored CPUs. | 205 | Do not aggregate counts across all monitored CPUs. |
206 | 206 | ||
207 | --topdown:: | ||
208 | Print top down level 1 metrics if supported by the CPU. This allows to | ||
209 | determine bottle necks in the CPU pipeline for CPU bound workloads, | ||
210 | by breaking the cycles consumed down into frontend bound, backend bound, | ||
211 | bad speculation and retiring. | ||
212 | |||
213 | Frontend bound means that the CPU cannot fetch and decode instructions fast | ||
214 | enough. Backend bound means that computation or memory access is the bottle | ||
215 | neck. Bad Speculation means that the CPU wasted cycles due to branch | ||
216 | mispredictions and similar issues. Retiring means that the CPU computed without | ||
217 | an apparently bottleneck. The bottleneck is only the real bottleneck | ||
218 | if the workload is actually bound by the CPU and not by something else. | ||
219 | |||
220 | For best results it is usually a good idea to use it with interval | ||
221 | mode like -I 1000, as the bottleneck of workloads can change often. | ||
222 | |||
223 | The top down metrics are collected per core instead of per | ||
224 | CPU thread. Per core mode is automatically enabled | ||
225 | and -a (global monitoring) is needed, requiring root rights or | ||
226 | perf.perf_event_paranoid=-1. | ||
227 | |||
228 | Topdown uses the full Performance Monitoring Unit, and needs | ||
229 | disabling of the NMI watchdog (as root): | ||
230 | echo 0 > /proc/sys/kernel/nmi_watchdog | ||
231 | for best results. Otherwise the bottlenecks may be inconsistent | ||
232 | on workload with changing phases. | ||
233 | |||
234 | This enables --metric-only, unless overriden with --no-metric-only. | ||
235 | |||
236 | To interpret the results it is usually needed to know on which | ||
237 | CPUs the workload runs on. If needed the CPUs can be forced using | ||
238 | taskset. | ||
207 | 239 | ||
208 | EXAMPLES | 240 | EXAMPLES |
209 | -------- | 241 | -------- |
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 465970370f3e..4cd8a16b1b7b 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build | |||
@@ -3,6 +3,7 @@ libperf-y += tsc.o | |||
3 | libperf-y += pmu.o | 3 | libperf-y += pmu.o |
4 | libperf-y += kvm-stat.o | 4 | libperf-y += kvm-stat.o |
5 | libperf-y += perf_regs.o | 5 | libperf-y += perf_regs.o |
6 | libperf-y += group.o | ||
6 | 7 | ||
7 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 8 | libperf-$(CONFIG_DWARF) += dwarf-regs.o |
8 | libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o | 9 | libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o |
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c new file mode 100644 index 000000000000..37f92aa39a5d --- /dev/null +++ b/tools/perf/arch/x86/util/group.c | |||
@@ -0,0 +1,27 @@ | |||
1 | #include <stdio.h> | ||
2 | #include "api/fs/fs.h" | ||
3 | #include "util/group.h" | ||
4 | |||
5 | /* | ||
6 | * Check whether we can use a group for top down. | ||
7 | * Without a group may get bad results due to multiplexing. | ||
8 | */ | ||
9 | bool arch_topdown_check_group(bool *warn) | ||
10 | { | ||
11 | int n; | ||
12 | |||
13 | if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) | ||
14 | return false; | ||
15 | if (n > 0) { | ||
16 | *warn = true; | ||
17 | return false; | ||
18 | } | ||
19 | return true; | ||
20 | } | ||
21 | |||
22 | void arch_topdown_group_warn(void) | ||
23 | { | ||
24 | fprintf(stderr, | ||
25 | "nmi_watchdog enabled with topdown. May give wrong results.\n" | ||
26 | "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); | ||
27 | } | ||
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e3ce2f34d3ad..46011235af5d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -339,7 +339,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr) | |||
339 | */ | 339 | */ |
340 | static int perf_session__check_output_opt(struct perf_session *session) | 340 | static int perf_session__check_output_opt(struct perf_session *session) |
341 | { | 341 | { |
342 | int j; | 342 | unsigned int j; |
343 | struct perf_evsel *evsel; | 343 | struct perf_evsel *evsel; |
344 | 344 | ||
345 | for (j = 0; j < PERF_TYPE_MAX; ++j) { | 345 | for (j = 0; j < PERF_TYPE_MAX; ++j) { |
@@ -388,17 +388,20 @@ static int perf_session__check_output_opt(struct perf_session *session) | |||
388 | struct perf_event_attr *attr; | 388 | struct perf_event_attr *attr; |
389 | 389 | ||
390 | j = PERF_TYPE_TRACEPOINT; | 390 | j = PERF_TYPE_TRACEPOINT; |
391 | evsel = perf_session__find_first_evtype(session, j); | ||
392 | if (evsel == NULL) | ||
393 | goto out; | ||
394 | 391 | ||
395 | attr = &evsel->attr; | 392 | evlist__for_each(session->evlist, evsel) { |
393 | if (evsel->attr.type != j) | ||
394 | continue; | ||
395 | |||
396 | attr = &evsel->attr; | ||
396 | 397 | ||
397 | if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { | 398 | if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { |
398 | output[j].fields |= PERF_OUTPUT_IP; | 399 | output[j].fields |= PERF_OUTPUT_IP; |
399 | output[j].fields |= PERF_OUTPUT_SYM; | 400 | output[j].fields |= PERF_OUTPUT_SYM; |
400 | output[j].fields |= PERF_OUTPUT_DSO; | 401 | output[j].fields |= PERF_OUTPUT_DSO; |
401 | set_print_ip_opts(attr); | 402 | set_print_ip_opts(attr); |
403 | goto out; | ||
404 | } | ||
402 | } | 405 | } |
403 | } | 406 | } |
404 | 407 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee7ada78d86f..dff63733dfb7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -59,10 +59,13 @@ | |||
59 | #include "util/thread.h" | 59 | #include "util/thread.h" |
60 | #include "util/thread_map.h" | 60 | #include "util/thread_map.h" |
61 | #include "util/counts.h" | 61 | #include "util/counts.h" |
62 | #include "util/group.h" | ||
62 | #include "util/session.h" | 63 | #include "util/session.h" |
63 | #include "util/tool.h" | 64 | #include "util/tool.h" |
65 | #include "util/group.h" | ||
64 | #include "asm/bug.h" | 66 | #include "asm/bug.h" |
65 | 67 | ||
68 | #include <api/fs/fs.h> | ||
66 | #include <stdlib.h> | 69 | #include <stdlib.h> |
67 | #include <sys/prctl.h> | 70 | #include <sys/prctl.h> |
68 | #include <locale.h> | 71 | #include <locale.h> |
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = { | |||
98 | "}" | 101 | "}" |
99 | }; | 102 | }; |
100 | 103 | ||
104 | static const char * topdown_attrs[] = { | ||
105 | "topdown-total-slots", | ||
106 | "topdown-slots-retired", | ||
107 | "topdown-recovery-bubbles", | ||
108 | "topdown-fetch-bubbles", | ||
109 | "topdown-slots-issued", | ||
110 | NULL, | ||
111 | }; | ||
112 | |||
101 | static struct perf_evlist *evsel_list; | 113 | static struct perf_evlist *evsel_list; |
102 | 114 | ||
103 | static struct target target = { | 115 | static struct target target = { |
@@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1; | |||
112 | static bool null_run = false; | 124 | static bool null_run = false; |
113 | static int detailed_run = 0; | 125 | static int detailed_run = 0; |
114 | static bool transaction_run; | 126 | static bool transaction_run; |
127 | static bool topdown_run = false; | ||
115 | static bool big_num = true; | 128 | static bool big_num = true; |
116 | static int big_num_opt = -1; | 129 | static int big_num_opt = -1; |
117 | static const char *csv_sep = NULL; | 130 | static const char *csv_sep = NULL; |
@@ -124,6 +137,7 @@ static unsigned int initial_delay = 0; | |||
124 | static unsigned int unit_width = 4; /* strlen("unit") */ | 137 | static unsigned int unit_width = 4; /* strlen("unit") */ |
125 | static bool forever = false; | 138 | static bool forever = false; |
126 | static bool metric_only = false; | 139 | static bool metric_only = false; |
140 | static bool force_metric_only = false; | ||
127 | static struct timespec ref_time; | 141 | static struct timespec ref_time; |
128 | static struct cpu_map *aggr_map; | 142 | static struct cpu_map *aggr_map; |
129 | static aggr_get_id_t aggr_get_id; | 143 | static aggr_get_id_t aggr_get_id; |
@@ -1302,7 +1316,15 @@ static int aggr_header_lens[] = { | |||
1302 | [AGGR_GLOBAL] = 0, | 1316 | [AGGR_GLOBAL] = 0, |
1303 | }; | 1317 | }; |
1304 | 1318 | ||
1305 | static void print_metric_headers(char *prefix) | 1319 | static const char *aggr_header_csv[] = { |
1320 | [AGGR_CORE] = "core,cpus,", | ||
1321 | [AGGR_SOCKET] = "socket,cpus", | ||
1322 | [AGGR_NONE] = "cpu,", | ||
1323 | [AGGR_THREAD] = "comm-pid,", | ||
1324 | [AGGR_GLOBAL] = "" | ||
1325 | }; | ||
1326 | |||
1327 | static void print_metric_headers(const char *prefix, bool no_indent) | ||
1306 | { | 1328 | { |
1307 | struct perf_stat_output_ctx out; | 1329 | struct perf_stat_output_ctx out; |
1308 | struct perf_evsel *counter; | 1330 | struct perf_evsel *counter; |
@@ -1313,9 +1335,15 @@ static void print_metric_headers(char *prefix) | |||
1313 | if (prefix) | 1335 | if (prefix) |
1314 | fprintf(stat_config.output, "%s", prefix); | 1336 | fprintf(stat_config.output, "%s", prefix); |
1315 | 1337 | ||
1316 | if (!csv_output) | 1338 | if (!csv_output && !no_indent) |
1317 | fprintf(stat_config.output, "%*s", | 1339 | fprintf(stat_config.output, "%*s", |
1318 | aggr_header_lens[stat_config.aggr_mode], ""); | 1340 | aggr_header_lens[stat_config.aggr_mode], ""); |
1341 | if (csv_output) { | ||
1342 | if (stat_config.interval) | ||
1343 | fputs("time,", stat_config.output); | ||
1344 | fputs(aggr_header_csv[stat_config.aggr_mode], | ||
1345 | stat_config.output); | ||
1346 | } | ||
1319 | 1347 | ||
1320 | /* Print metrics headers only */ | 1348 | /* Print metrics headers only */ |
1321 | evlist__for_each(evsel_list, counter) { | 1349 | evlist__for_each(evsel_list, counter) { |
@@ -1338,28 +1366,40 @@ static void print_interval(char *prefix, struct timespec *ts) | |||
1338 | 1366 | ||
1339 | sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); | 1367 | sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); |
1340 | 1368 | ||
1341 | if (num_print_interval == 0 && !csv_output && !metric_only) { | 1369 | if (num_print_interval == 0 && !csv_output) { |
1342 | switch (stat_config.aggr_mode) { | 1370 | switch (stat_config.aggr_mode) { |
1343 | case AGGR_SOCKET: | 1371 | case AGGR_SOCKET: |
1344 | fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); | 1372 | fprintf(output, "# time socket cpus"); |
1373 | if (!metric_only) | ||
1374 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | ||
1345 | break; | 1375 | break; |
1346 | case AGGR_CORE: | 1376 | case AGGR_CORE: |
1347 | fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); | 1377 | fprintf(output, "# time core cpus"); |
1378 | if (!metric_only) | ||
1379 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | ||
1348 | break; | 1380 | break; |
1349 | case AGGR_NONE: | 1381 | case AGGR_NONE: |
1350 | fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); | 1382 | fprintf(output, "# time CPU"); |
1383 | if (!metric_only) | ||
1384 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | ||
1351 | break; | 1385 | break; |
1352 | case AGGR_THREAD: | 1386 | case AGGR_THREAD: |
1353 | fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); | 1387 | fprintf(output, "# time comm-pid"); |
1388 | if (!metric_only) | ||
1389 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | ||
1354 | break; | 1390 | break; |
1355 | case AGGR_GLOBAL: | 1391 | case AGGR_GLOBAL: |
1356 | default: | 1392 | default: |
1357 | fprintf(output, "# time counts %*s events\n", unit_width, "unit"); | 1393 | fprintf(output, "# time"); |
1394 | if (!metric_only) | ||
1395 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | ||
1358 | case AGGR_UNSET: | 1396 | case AGGR_UNSET: |
1359 | break; | 1397 | break; |
1360 | } | 1398 | } |
1361 | } | 1399 | } |
1362 | 1400 | ||
1401 | if (num_print_interval == 0 && metric_only) | ||
1402 | print_metric_headers(" ", true); | ||
1363 | if (++num_print_interval == 25) | 1403 | if (++num_print_interval == 25) |
1364 | num_print_interval = 0; | 1404 | num_print_interval = 0; |
1365 | } | 1405 | } |
@@ -1428,8 +1468,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
1428 | if (metric_only) { | 1468 | if (metric_only) { |
1429 | static int num_print_iv; | 1469 | static int num_print_iv; |
1430 | 1470 | ||
1431 | if (num_print_iv == 0) | 1471 | if (num_print_iv == 0 && !interval) |
1432 | print_metric_headers(prefix); | 1472 | print_metric_headers(prefix, false); |
1433 | if (num_print_iv++ == 25) | 1473 | if (num_print_iv++ == 25) |
1434 | num_print_iv = 0; | 1474 | num_print_iv = 0; |
1435 | if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) | 1475 | if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) |
@@ -1520,6 +1560,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, | |||
1520 | return 0; | 1560 | return 0; |
1521 | } | 1561 | } |
1522 | 1562 | ||
1563 | static int enable_metric_only(const struct option *opt __maybe_unused, | ||
1564 | const char *s __maybe_unused, int unset) | ||
1565 | { | ||
1566 | force_metric_only = true; | ||
1567 | metric_only = !unset; | ||
1568 | return 0; | ||
1569 | } | ||
1570 | |||
1523 | static const struct option stat_options[] = { | 1571 | static const struct option stat_options[] = { |
1524 | OPT_BOOLEAN('T', "transaction", &transaction_run, | 1572 | OPT_BOOLEAN('T', "transaction", &transaction_run, |
1525 | "hardware transaction statistics"), | 1573 | "hardware transaction statistics"), |
@@ -1578,8 +1626,10 @@ static const struct option stat_options[] = { | |||
1578 | "aggregate counts per thread", AGGR_THREAD), | 1626 | "aggregate counts per thread", AGGR_THREAD), |
1579 | OPT_UINTEGER('D', "delay", &initial_delay, | 1627 | OPT_UINTEGER('D', "delay", &initial_delay, |
1580 | "ms to wait before starting measurement after program start"), | 1628 | "ms to wait before starting measurement after program start"), |
1581 | OPT_BOOLEAN(0, "metric-only", &metric_only, | 1629 | OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, |
1582 | "Only print computed metrics. No raw values"), | 1630 | "Only print computed metrics. No raw values", enable_metric_only), |
1631 | OPT_BOOLEAN(0, "topdown", &topdown_run, | ||
1632 | "measure topdown level 1 statistics"), | ||
1583 | OPT_END() | 1633 | OPT_END() |
1584 | }; | 1634 | }; |
1585 | 1635 | ||
@@ -1772,12 +1822,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) | |||
1772 | return 0; | 1822 | return 0; |
1773 | } | 1823 | } |
1774 | 1824 | ||
1825 | static int topdown_filter_events(const char **attr, char **str, bool use_group) | ||
1826 | { | ||
1827 | int off = 0; | ||
1828 | int i; | ||
1829 | int len = 0; | ||
1830 | char *s; | ||
1831 | |||
1832 | for (i = 0; attr[i]; i++) { | ||
1833 | if (pmu_have_event("cpu", attr[i])) { | ||
1834 | len += strlen(attr[i]) + 1; | ||
1835 | attr[i - off] = attr[i]; | ||
1836 | } else | ||
1837 | off++; | ||
1838 | } | ||
1839 | attr[i - off] = NULL; | ||
1840 | |||
1841 | *str = malloc(len + 1 + 2); | ||
1842 | if (!*str) | ||
1843 | return -1; | ||
1844 | s = *str; | ||
1845 | if (i - off == 0) { | ||
1846 | *s = 0; | ||
1847 | return 0; | ||
1848 | } | ||
1849 | if (use_group) | ||
1850 | *s++ = '{'; | ||
1851 | for (i = 0; attr[i]; i++) { | ||
1852 | strcpy(s, attr[i]); | ||
1853 | s += strlen(s); | ||
1854 | *s++ = ','; | ||
1855 | } | ||
1856 | if (use_group) { | ||
1857 | s[-1] = '}'; | ||
1858 | *s = 0; | ||
1859 | } else | ||
1860 | s[-1] = 0; | ||
1861 | return 0; | ||
1862 | } | ||
1863 | |||
1864 | __weak bool arch_topdown_check_group(bool *warn) | ||
1865 | { | ||
1866 | *warn = false; | ||
1867 | return false; | ||
1868 | } | ||
1869 | |||
1870 | __weak void arch_topdown_group_warn(void) | ||
1871 | { | ||
1872 | } | ||
1873 | |||
1775 | /* | 1874 | /* |
1776 | * Add default attributes, if there were no attributes specified or | 1875 | * Add default attributes, if there were no attributes specified or |
1777 | * if -d/--detailed, -d -d or -d -d -d is used: | 1876 | * if -d/--detailed, -d -d or -d -d -d is used: |
1778 | */ | 1877 | */ |
1779 | static int add_default_attributes(void) | 1878 | static int add_default_attributes(void) |
1780 | { | 1879 | { |
1880 | int err; | ||
1781 | struct perf_event_attr default_attrs0[] = { | 1881 | struct perf_event_attr default_attrs0[] = { |
1782 | 1882 | ||
1783 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 1883 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
@@ -1896,7 +1996,6 @@ static int add_default_attributes(void) | |||
1896 | return 0; | 1996 | return 0; |
1897 | 1997 | ||
1898 | if (transaction_run) { | 1998 | if (transaction_run) { |
1899 | int err; | ||
1900 | if (pmu_have_event("cpu", "cycles-ct") && | 1999 | if (pmu_have_event("cpu", "cycles-ct") && |
1901 | pmu_have_event("cpu", "el-start")) | 2000 | pmu_have_event("cpu", "el-start")) |
1902 | err = parse_events(evsel_list, transaction_attrs, NULL); | 2001 | err = parse_events(evsel_list, transaction_attrs, NULL); |
@@ -1909,6 +2008,46 @@ static int add_default_attributes(void) | |||
1909 | return 0; | 2008 | return 0; |
1910 | } | 2009 | } |
1911 | 2010 | ||
2011 | if (topdown_run) { | ||
2012 | char *str = NULL; | ||
2013 | bool warn = false; | ||
2014 | |||
2015 | if (stat_config.aggr_mode != AGGR_GLOBAL && | ||
2016 | stat_config.aggr_mode != AGGR_CORE) { | ||
2017 | pr_err("top down event configuration requires --per-core mode\n"); | ||
2018 | return -1; | ||
2019 | } | ||
2020 | stat_config.aggr_mode = AGGR_CORE; | ||
2021 | if (nr_cgroups || !target__has_cpu(&target)) { | ||
2022 | pr_err("top down event configuration requires system-wide mode (-a)\n"); | ||
2023 | return -1; | ||
2024 | } | ||
2025 | |||
2026 | if (!force_metric_only) | ||
2027 | metric_only = true; | ||
2028 | if (topdown_filter_events(topdown_attrs, &str, | ||
2029 | arch_topdown_check_group(&warn)) < 0) { | ||
2030 | pr_err("Out of memory\n"); | ||
2031 | return -1; | ||
2032 | } | ||
2033 | if (topdown_attrs[0] && str) { | ||
2034 | if (warn) | ||
2035 | arch_topdown_group_warn(); | ||
2036 | err = parse_events(evsel_list, str, NULL); | ||
2037 | if (err) { | ||
2038 | fprintf(stderr, | ||
2039 | "Cannot set up top down events %s: %d\n", | ||
2040 | str, err); | ||
2041 | free(str); | ||
2042 | return -1; | ||
2043 | } | ||
2044 | } else { | ||
2045 | fprintf(stderr, "System does not support topdown\n"); | ||
2046 | return -1; | ||
2047 | } | ||
2048 | free(str); | ||
2049 | } | ||
2050 | |||
1912 | if (!evsel_list->nr_entries) { | 2051 | if (!evsel_list->nr_entries) { |
1913 | if (target__has_cpu(&target)) | 2052 | if (target__has_cpu(&target)) |
1914 | default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; | 2053 | default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; |
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7865f68dc0d8..b2a2c74136a5 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c | |||
@@ -1783,8 +1783,8 @@ static int test_pmu_events(void) | |||
1783 | struct evlist_test e; | 1783 | struct evlist_test e; |
1784 | char name[MAX_NAME]; | 1784 | char name[MAX_NAME]; |
1785 | 1785 | ||
1786 | if (!strcmp(ent->d_name, ".") || | 1786 | /* Names containing . are special and cannot be used directly */ |
1787 | !strcmp(ent->d_name, "..")) | 1787 | if (strchr(ent->d_name, '.')) |
1788 | continue; | 1788 | continue; |
1789 | 1789 | ||
1790 | snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name); | 1790 | snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name); |
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index dad7d8272168..c73f1c4d1ca9 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c | |||
@@ -275,7 +275,8 @@ static int perf_parse_file(config_fn_t fn, void *data) | |||
275 | break; | 275 | break; |
276 | } | 276 | } |
277 | } | 277 | } |
278 | die("bad config file line %d in %s", config_linenr, config_file_name); | 278 | pr_err("bad config file line %d in %s\n", config_linenr, config_file_name); |
279 | return -1; | ||
279 | } | 280 | } |
280 | 281 | ||
281 | static int parse_unit_factor(const char *end, unsigned long *val) | 282 | static int parse_unit_factor(const char *end, unsigned long *val) |
@@ -479,16 +480,15 @@ static int perf_config_global(void) | |||
479 | 480 | ||
480 | int perf_config(config_fn_t fn, void *data) | 481 | int perf_config(config_fn_t fn, void *data) |
481 | { | 482 | { |
482 | int ret = 0, found = 0; | 483 | int ret = -1; |
483 | const char *home = NULL; | 484 | const char *home = NULL; |
484 | 485 | ||
485 | /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ | 486 | /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ |
486 | if (config_exclusive_filename) | 487 | if (config_exclusive_filename) |
487 | return perf_config_from_file(fn, config_exclusive_filename, data); | 488 | return perf_config_from_file(fn, config_exclusive_filename, data); |
488 | if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { | 489 | if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { |
489 | ret += perf_config_from_file(fn, perf_etc_perfconfig(), | 490 | if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0) |
490 | data); | 491 | goto out; |
491 | found += 1; | ||
492 | } | 492 | } |
493 | 493 | ||
494 | home = getenv("HOME"); | 494 | home = getenv("HOME"); |
@@ -514,14 +514,12 @@ int perf_config(config_fn_t fn, void *data) | |||
514 | if (!st.st_size) | 514 | if (!st.st_size) |
515 | goto out_free; | 515 | goto out_free; |
516 | 516 | ||
517 | ret += perf_config_from_file(fn, user_config, data); | 517 | ret = perf_config_from_file(fn, user_config, data); |
518 | found += 1; | 518 | |
519 | out_free: | 519 | out_free: |
520 | free(user_config); | 520 | free(user_config); |
521 | } | 521 | } |
522 | out: | 522 | out: |
523 | if (found == 0) | ||
524 | return -1; | ||
525 | return ret; | 523 | return ret; |
526 | } | 524 | } |
527 | 525 | ||
@@ -609,8 +607,12 @@ static int collect_config(const char *var, const char *value, | |||
609 | struct perf_config_section *section = NULL; | 607 | struct perf_config_section *section = NULL; |
610 | struct perf_config_item *item = NULL; | 608 | struct perf_config_item *item = NULL; |
611 | struct perf_config_set *set = perf_config_set; | 609 | struct perf_config_set *set = perf_config_set; |
612 | struct list_head *sections = &set->sections; | 610 | struct list_head *sections; |
611 | |||
612 | if (set == NULL) | ||
613 | return -1; | ||
613 | 614 | ||
615 | sections = &set->sections; | ||
614 | key = ptr = strdup(var); | 616 | key = ptr = strdup(var); |
615 | if (!key) { | 617 | if (!key) { |
616 | pr_debug("%s: strdup failed\n", __func__); | 618 | pr_debug("%s: strdup failed\n", __func__); |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e0f30946ed1a..1b918aa075d6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -946,9 +946,12 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) | |||
946 | if (cpu_map__empty(evlist->cpus)) | 946 | if (cpu_map__empty(evlist->cpus)) |
947 | evlist->nr_mmaps = thread_map__nr(evlist->threads); | 947 | evlist->nr_mmaps = thread_map__nr(evlist->threads); |
948 | evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); | 948 | evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); |
949 | if (!evlist->mmap) | ||
950 | return -ENOMEM; | ||
951 | |||
949 | for (i = 0; i < evlist->nr_mmaps; i++) | 952 | for (i = 0; i < evlist->nr_mmaps; i++) |
950 | evlist->mmap[i].fd = -1; | 953 | evlist->mmap[i].fd = -1; |
951 | return evlist->mmap != NULL ? 0 : -ENOMEM; | 954 | return 0; |
952 | } | 955 | } |
953 | 956 | ||
954 | struct mmap_params { | 957 | struct mmap_params { |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 18e18f1d435e..9b2e3e624efe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -2251,17 +2251,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, | |||
2251 | return sample->raw_data + offset; | 2251 | return sample->raw_data + offset; |
2252 | } | 2252 | } |
2253 | 2253 | ||
2254 | u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, | 2254 | u64 format_field__intval(struct format_field *field, struct perf_sample *sample, |
2255 | const char *name) | 2255 | bool needs_swap) |
2256 | { | 2256 | { |
2257 | struct format_field *field = perf_evsel__field(evsel, name); | ||
2258 | void *ptr; | ||
2259 | u64 value; | 2257 | u64 value; |
2260 | 2258 | void *ptr = sample->raw_data + field->offset; | |
2261 | if (!field) | ||
2262 | return 0; | ||
2263 | |||
2264 | ptr = sample->raw_data + field->offset; | ||
2265 | 2259 | ||
2266 | switch (field->size) { | 2260 | switch (field->size) { |
2267 | case 1: | 2261 | case 1: |
@@ -2279,7 +2273,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, | |||
2279 | return 0; | 2273 | return 0; |
2280 | } | 2274 | } |
2281 | 2275 | ||
2282 | if (!evsel->needs_swap) | 2276 | if (!needs_swap) |
2283 | return value; | 2277 | return value; |
2284 | 2278 | ||
2285 | switch (field->size) { | 2279 | switch (field->size) { |
@@ -2296,6 +2290,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, | |||
2296 | return 0; | 2290 | return 0; |
2297 | } | 2291 | } |
2298 | 2292 | ||
2293 | u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, | ||
2294 | const char *name) | ||
2295 | { | ||
2296 | struct format_field *field = perf_evsel__field(evsel, name); | ||
2297 | |||
2298 | if (!field) | ||
2299 | return 0; | ||
2300 | |||
2301 | return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; | ||
2302 | } | ||
2303 | |||
2299 | bool perf_evsel__fallback(struct perf_evsel *evsel, int err, | 2304 | bool perf_evsel__fallback(struct perf_evsel *evsel, int err, |
2300 | char *msg, size_t msgsize) | 2305 | char *msg, size_t msgsize) |
2301 | { | 2306 | { |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 028412b32d5a..828ddd1c8947 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -261,6 +261,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel, | |||
261 | 261 | ||
262 | struct format_field; | 262 | struct format_field; |
263 | 263 | ||
264 | u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap); | ||
265 | |||
264 | struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); | 266 | struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); |
265 | 267 | ||
266 | #define perf_evsel__match(evsel, t, c) \ | 268 | #define perf_evsel__match(evsel, t, c) \ |
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h new file mode 100644 index 000000000000..116debe7a995 --- /dev/null +++ b/tools/perf/util/group.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef GROUP_H | ||
2 | #define GROUP_H 1 | ||
3 | |||
4 | bool arch_topdown_check_group(bool *warn); | ||
5 | void arch_topdown_group_warn(void); | ||
6 | |||
7 | #endif | ||
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 01af1ee90a27..3c15b33b2e84 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l | |||
@@ -260,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } | |||
260 | cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } | 260 | cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } |
261 | mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } | 261 | mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } |
262 | mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } | 262 | mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } |
263 | topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } | ||
263 | 264 | ||
264 | L1-dcache|l1-d|l1d|L1-data | | 265 | L1-dcache|l1-d|l1d|L1-data | |
265 | L1-icache|l1-i|l1i|L1-instruction | | 266 | L1-icache|l1-i|l1i|L1-instruction | |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index aa9efe08762b..8a2bbd2a4d82 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |||
36 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | 36 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; |
37 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | 37 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; |
38 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | 38 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; |
39 | static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; | ||
40 | static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | ||
41 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | ||
42 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | ||
43 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | ||
39 | static bool have_frontend_stalled; | 44 | static bool have_frontend_stalled; |
40 | 45 | ||
41 | struct stats walltime_nsecs_stats; | 46 | struct stats walltime_nsecs_stats; |
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void) | |||
82 | sizeof(runtime_transaction_stats)); | 87 | sizeof(runtime_transaction_stats)); |
83 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | 88 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); |
84 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | 89 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); |
90 | memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); | ||
91 | memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); | ||
92 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | ||
93 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | ||
94 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | ||
85 | } | 95 | } |
86 | 96 | ||
87 | /* | 97 | /* |
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |||
105 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | 115 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); |
106 | else if (perf_stat_evsel__is(counter, ELISION_START)) | 116 | else if (perf_stat_evsel__is(counter, ELISION_START)) |
107 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | 117 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); |
118 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) | ||
119 | update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); | ||
120 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) | ||
121 | update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); | ||
122 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) | ||
123 | update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); | ||
124 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) | ||
125 | update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); | ||
126 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) | ||
127 | update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); | ||
108 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | 128 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
109 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | 129 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); |
110 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | 130 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) |
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu, | |||
302 | out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); | 322 | out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); |
303 | } | 323 | } |
304 | 324 | ||
325 | /* | ||
326 | * High level "TopDown" CPU core pipe line bottleneck break down. | ||
327 | * | ||
328 | * Basic concept following | ||
329 | * Yasin, A Top Down Method for Performance analysis and Counter architecture | ||
330 | * ISPASS14 | ||
331 | * | ||
332 | * The CPU pipeline is divided into 4 areas that can be bottlenecks: | ||
333 | * | ||
334 | * Frontend -> Backend -> Retiring | ||
335 | * BadSpeculation in addition means out of order execution that is thrown away | ||
336 | * (for example branch mispredictions) | ||
337 | * Frontend is instruction decoding. | ||
338 | * Backend is execution, like computation and accessing data in memory | ||
339 | * Retiring is good execution that is not directly bottlenecked | ||
340 | * | ||
341 | * The formulas are computed in slots. | ||
342 | * A slot is an entry in the pipeline each for the pipeline width | ||
343 | * (for example a 4-wide pipeline has 4 slots for each cycle) | ||
344 | * | ||
345 | * Formulas: | ||
346 | * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / | ||
347 | * TotalSlots | ||
348 | * Retiring = SlotsRetired / TotalSlots | ||
349 | * FrontendBound = FetchBubbles / TotalSlots | ||
350 | * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound | ||
351 | * | ||
352 | * The kernel provides the mapping to the low level CPU events and any scaling | ||
353 | * needed for the CPU pipeline width, for example: | ||
354 | * | ||
355 | * TotalSlots = Cycles * 4 | ||
356 | * | ||
357 | * The scaling factor is communicated in the sysfs unit. | ||
358 | * | ||
359 | * In some cases the CPU may not be able to measure all the formulas due to | ||
360 | * missing events. In this case multiple formulas are combined, as possible. | ||
361 | * | ||
362 | * Full TopDown supports more levels to sub-divide each area: for example | ||
363 | * BackendBound into computing bound and memory bound. For now we only | ||
364 | * support Level 1 TopDown. | ||
365 | */ | ||
366 | |||
367 | static double sanitize_val(double x) | ||
368 | { | ||
369 | if (x < 0 && x >= -0.02) | ||
370 | return 0.0; | ||
371 | return x; | ||
372 | } | ||
373 | |||
374 | static double td_total_slots(int ctx, int cpu) | ||
375 | { | ||
376 | return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); | ||
377 | } | ||
378 | |||
379 | static double td_bad_spec(int ctx, int cpu) | ||
380 | { | ||
381 | double bad_spec = 0; | ||
382 | double total_slots; | ||
383 | double total; | ||
384 | |||
385 | total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - | ||
386 | avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + | ||
387 | avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); | ||
388 | total_slots = td_total_slots(ctx, cpu); | ||
389 | if (total_slots) | ||
390 | bad_spec = total / total_slots; | ||
391 | return sanitize_val(bad_spec); | ||
392 | } | ||
393 | |||
394 | static double td_retiring(int ctx, int cpu) | ||
395 | { | ||
396 | double retiring = 0; | ||
397 | double total_slots = td_total_slots(ctx, cpu); | ||
398 | double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); | ||
399 | |||
400 | if (total_slots) | ||
401 | retiring = ret_slots / total_slots; | ||
402 | return retiring; | ||
403 | } | ||
404 | |||
405 | static double td_fe_bound(int ctx, int cpu) | ||
406 | { | ||
407 | double fe_bound = 0; | ||
408 | double total_slots = td_total_slots(ctx, cpu); | ||
409 | double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); | ||
410 | |||
411 | if (total_slots) | ||
412 | fe_bound = fetch_bub / total_slots; | ||
413 | return fe_bound; | ||
414 | } | ||
415 | |||
416 | static double td_be_bound(int ctx, int cpu) | ||
417 | { | ||
418 | double sum = (td_fe_bound(ctx, cpu) + | ||
419 | td_bad_spec(ctx, cpu) + | ||
420 | td_retiring(ctx, cpu)); | ||
421 | if (sum == 0) | ||
422 | return 0; | ||
423 | return sanitize_val(1.0 - sum); | ||
424 | } | ||
425 | |||
305 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | 426 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, |
306 | double avg, int cpu, | 427 | double avg, int cpu, |
307 | struct perf_stat_output_ctx *out) | 428 | struct perf_stat_output_ctx *out) |
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
309 | void *ctxp = out->ctx; | 430 | void *ctxp = out->ctx; |
310 | print_metric_t print_metric = out->print_metric; | 431 | print_metric_t print_metric = out->print_metric; |
311 | double total, ratio = 0.0, total2; | 432 | double total, ratio = 0.0, total2; |
433 | const char *color = NULL; | ||
312 | int ctx = evsel_context(evsel); | 434 | int ctx = evsel_context(evsel); |
313 | 435 | ||
314 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | 436 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { |
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
452 | avg / ratio); | 574 | avg / ratio); |
453 | else | 575 | else |
454 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); | 576 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); |
577 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { | ||
578 | double fe_bound = td_fe_bound(ctx, cpu); | ||
579 | |||
580 | if (fe_bound > 0.2) | ||
581 | color = PERF_COLOR_RED; | ||
582 | print_metric(ctxp, color, "%8.1f%%", "frontend bound", | ||
583 | fe_bound * 100.); | ||
584 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { | ||
585 | double retiring = td_retiring(ctx, cpu); | ||
586 | |||
587 | if (retiring > 0.7) | ||
588 | color = PERF_COLOR_GREEN; | ||
589 | print_metric(ctxp, color, "%8.1f%%", "retiring", | ||
590 | retiring * 100.); | ||
591 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { | ||
592 | double bad_spec = td_bad_spec(ctx, cpu); | ||
593 | |||
594 | if (bad_spec > 0.1) | ||
595 | color = PERF_COLOR_RED; | ||
596 | print_metric(ctxp, color, "%8.1f%%", "bad speculation", | ||
597 | bad_spec * 100.); | ||
598 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { | ||
599 | double be_bound = td_be_bound(ctx, cpu); | ||
600 | const char *name = "backend bound"; | ||
601 | static int have_recovery_bubbles = -1; | ||
602 | |||
603 | /* In case the CPU does not support topdown-recovery-bubbles */ | ||
604 | if (have_recovery_bubbles < 0) | ||
605 | have_recovery_bubbles = pmu_have_event("cpu", | ||
606 | "topdown-recovery-bubbles"); | ||
607 | if (!have_recovery_bubbles) | ||
608 | name = "backend bound/bad spec"; | ||
609 | |||
610 | if (be_bound > 0.2) | ||
611 | color = PERF_COLOR_RED; | ||
612 | if (td_total_slots(ctx, cpu) > 0) | ||
613 | print_metric(ctxp, color, "%8.1f%%", name, | ||
614 | be_bound * 100.); | ||
615 | else | ||
616 | print_metric(ctxp, NULL, NULL, name, 0); | ||
455 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 617 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
456 | char unit = 'M'; | 618 | char unit = 'M'; |
457 | char unit_buf[10]; | 619 | char unit_buf[10]; |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ffa1d0653861..c1ba255f2abe 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { | |||
79 | ID(TRANSACTION_START, cpu/tx-start/), | 79 | ID(TRANSACTION_START, cpu/tx-start/), |
80 | ID(ELISION_START, cpu/el-start/), | 80 | ID(ELISION_START, cpu/el-start/), |
81 | ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), | 81 | ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), |
82 | ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), | ||
83 | ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), | ||
84 | ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), | ||
85 | ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), | ||
86 | ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), | ||
82 | }; | 87 | }; |
83 | #undef ID | 88 | #undef ID |
84 | 89 | ||
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0150e786ccc7..c29bb94c48a4 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -17,6 +17,11 @@ enum perf_stat_evsel_id { | |||
17 | PERF_STAT_EVSEL_ID__TRANSACTION_START, | 17 | PERF_STAT_EVSEL_ID__TRANSACTION_START, |
18 | PERF_STAT_EVSEL_ID__ELISION_START, | 18 | PERF_STAT_EVSEL_ID__ELISION_START, |
19 | PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, | 19 | PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, |
20 | PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS, | ||
21 | PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED, | ||
22 | PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, | ||
23 | PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, | ||
24 | PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, | ||
20 | PERF_STAT_EVSEL_ID__MAX, | 25 | PERF_STAT_EVSEL_ID__MAX, |
21 | }; | 26 | }; |
22 | 27 | ||