aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2016-05-30 11:49:42 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-06-06 16:04:15 -0400
commit44b1e60ab576c343aa592a2a6c679297cc69740d (patch)
treeaf5a5f995cb7e0f6ce9a57f9163386d9be1321ed
parent17a2634bcb88e52bd637fdaa47d7ff0bddb0188f (diff)
perf stat: Basic support for TopDown in perf stat
Add basic plumbing for TopDown in perf stat TopDown is intended to replace the frontend cycles idle/ backend cycles idle metrics in standard perf stat output. These metrics are not reliable in many workloads, due to out of order effects. This implements a new --topdown mode in perf stat (similar to --transaction) that measures the pipe line bottlenecks using standardized formulas. The measurement can be all done with 5 counters (one fixed counter) The result are four metrics: FrontendBound, BackendBound, BadSpeculation, Retiring that describe the CPU pipeline behavior on a high level. The full top down methology has many hierarchical metrics. This implementation only supports level 1 which can be collected without multiplexing. A full implementation of top down on top of perf is available in pmu-tools toplev. (http://github.com/andikleen/pmu-tools) The current version works on Intel Core CPUs starting with Sandy Bridge, and Atom CPUs starting with Silvermont. In principle the generic metrics should be also implementable on other out of order CPUs. TopDown level 1 uses a set of abstracted metrics which are generic to out of order CPU cores (although some CPUs may not implement all of them): topdown-total-slots Available slots in the pipeline topdown-slots-issued Slots issued into the pipeline topdown-slots-retired Slots successfully retired topdown-fetch-bubbles Pipeline gaps in the frontend topdown-recovery-bubbles Pipeline gaps during recovery from misspeculation These metrics then allow to compute four useful metrics: FrontendBound, BackendBound, Retiring, BadSpeculation. Add a new --topdown options to enable events. When --topdown is specified set up events for all topdown events supported by the kernel. Add topdown-* as a special case to the event parser, as is needed for all events containing -. The actual code to compute the metrics is in follow-on patches. v2: Use standard sysctl read function. v3: Move x86 specific code to arch/ v4: Enable --metric-only implicitly for topdown. v5: Add --single-thread option to not force per core mode v6: Fix output order of topdown metrics v7: Allow combining with -d v8: Remove --single-thread again v9: Rename functions, adding arch_ and topdown_. v10: Expand man page and describe TopDown better Paste intro into commit description. Print error when malloc fails. Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/1464119559-17203-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-stat.txt32
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/group.c27
-rw-r--r--tools/perf/builtin-stat.c119
-rw-r--r--tools/perf/util/group.h7
-rw-r--r--tools/perf/util/parse-events.l1
6 files changed, 184 insertions, 3 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04f23b404bbc..d96ccd4844df 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
204--no-aggr:: 204--no-aggr::
205Do not aggregate counts across all monitored CPUs. 205Do not aggregate counts across all monitored CPUs.
206 206
207--topdown::
208Print top down level 1 metrics if supported by the CPU. This allows to
209determine bottle necks in the CPU pipeline for CPU bound workloads,
210by breaking the cycles consumed down into frontend bound, backend bound,
211bad speculation and retiring.
212
213Frontend bound means that the CPU cannot fetch and decode instructions fast
214enough. Backend bound means that computation or memory access is the bottle
215neck. Bad Speculation means that the CPU wasted cycles due to branch
216mispredictions and similar issues. Retiring means that the CPU computed without
217an apparently bottleneck. The bottleneck is only the real bottleneck
218if the workload is actually bound by the CPU and not by something else.
219
220For best results it is usually a good idea to use it with interval
221mode like -I 1000, as the bottleneck of workloads can change often.
222
223The top down metrics are collected per core instead of per
224CPU thread. Per core mode is automatically enabled
225and -a (global monitoring) is needed, requiring root rights or
226perf.perf_event_paranoid=-1.
227
228Topdown uses the full Performance Monitoring Unit, and needs
229disabling of the NMI watchdog (as root):
230echo 0 > /proc/sys/kernel/nmi_watchdog
231for best results. Otherwise the bottlenecks may be inconsistent
232on workload with changing phases.
233
234This enables --metric-only, unless overriden with --no-metric-only.
235
236To interpret the results it is usually needed to know on which
237CPUs the workload runs on. If needed the CPUs can be forced using
238taskset.
207 239
208EXAMPLES 240EXAMPLES
209-------- 241--------
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 465970370f3e..4cd8a16b1b7b 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,6 +3,7 @@ libperf-y += tsc.o
3libperf-y += pmu.o 3libperf-y += pmu.o
4libperf-y += kvm-stat.o 4libperf-y += kvm-stat.o
5libperf-y += perf_regs.o 5libperf-y += perf_regs.o
6libperf-y += group.o
6 7
7libperf-$(CONFIG_DWARF) += dwarf-regs.o 8libperf-$(CONFIG_DWARF) += dwarf-regs.o
8libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o 9libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644
index 000000000000..37f92aa39a5d
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
1#include <stdio.h>
2#include "api/fs/fs.h"
3#include "util/group.h"
4
5/*
6 * Check whether we can use a group for top down.
7 * Without a group may get bad results due to multiplexing.
8 */
9bool arch_topdown_check_group(bool *warn)
10{
11 int n;
12
13 if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
14 return false;
15 if (n > 0) {
16 *warn = true;
17 return false;
18 }
19 return true;
20}
21
22void arch_topdown_group_warn(void)
23{
24 fprintf(stderr,
25 "nmi_watchdog enabled with topdown. May give wrong results.\n"
26 "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
27}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee7ada78d86f..fd76bb0b18d1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
59#include "util/thread.h" 59#include "util/thread.h"
60#include "util/thread_map.h" 60#include "util/thread_map.h"
61#include "util/counts.h" 61#include "util/counts.h"
62#include "util/group.h"
62#include "util/session.h" 63#include "util/session.h"
63#include "util/tool.h" 64#include "util/tool.h"
65#include "util/group.h"
64#include "asm/bug.h" 66#include "asm/bug.h"
65 67
68#include <api/fs/fs.h>
66#include <stdlib.h> 69#include <stdlib.h>
67#include <sys/prctl.h> 70#include <sys/prctl.h>
68#include <locale.h> 71#include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
98 "}" 101 "}"
99}; 102};
100 103
104static const char * topdown_attrs[] = {
105 "topdown-total-slots",
106 "topdown-slots-retired",
107 "topdown-recovery-bubbles",
108 "topdown-fetch-bubbles",
109 "topdown-slots-issued",
110 NULL,
111};
112
101static struct perf_evlist *evsel_list; 113static struct perf_evlist *evsel_list;
102 114
103static struct target target = { 115static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1;
112static bool null_run = false; 124static bool null_run = false;
113static int detailed_run = 0; 125static int detailed_run = 0;
114static bool transaction_run; 126static bool transaction_run;
127static bool topdown_run = false;
115static bool big_num = true; 128static bool big_num = true;
116static int big_num_opt = -1; 129static int big_num_opt = -1;
117static const char *csv_sep = NULL; 130static const char *csv_sep = NULL;
@@ -124,6 +137,7 @@ static unsigned int initial_delay = 0;
124static unsigned int unit_width = 4; /* strlen("unit") */ 137static unsigned int unit_width = 4; /* strlen("unit") */
125static bool forever = false; 138static bool forever = false;
126static bool metric_only = false; 139static bool metric_only = false;
140static bool force_metric_only = false;
127static struct timespec ref_time; 141static struct timespec ref_time;
128static struct cpu_map *aggr_map; 142static struct cpu_map *aggr_map;
129static aggr_get_id_t aggr_get_id; 143static aggr_get_id_t aggr_get_id;
@@ -1520,6 +1534,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
1520 return 0; 1534 return 0;
1521} 1535}
1522 1536
1537static int enable_metric_only(const struct option *opt __maybe_unused,
1538 const char *s __maybe_unused, int unset)
1539{
1540 force_metric_only = true;
1541 metric_only = !unset;
1542 return 0;
1543}
1544
1523static const struct option stat_options[] = { 1545static const struct option stat_options[] = {
1524 OPT_BOOLEAN('T', "transaction", &transaction_run, 1546 OPT_BOOLEAN('T', "transaction", &transaction_run,
1525 "hardware transaction statistics"), 1547 "hardware transaction statistics"),
@@ -1578,8 +1600,10 @@ static const struct option stat_options[] = {
1578 "aggregate counts per thread", AGGR_THREAD), 1600 "aggregate counts per thread", AGGR_THREAD),
1579 OPT_UINTEGER('D', "delay", &initial_delay, 1601 OPT_UINTEGER('D', "delay", &initial_delay,
1580 "ms to wait before starting measurement after program start"), 1602 "ms to wait before starting measurement after program start"),
1581 OPT_BOOLEAN(0, "metric-only", &metric_only, 1603 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
1582 "Only print computed metrics. No raw values"), 1604 "Only print computed metrics. No raw values", enable_metric_only),
1605 OPT_BOOLEAN(0, "topdown", &topdown_run,
1606 "measure topdown level 1 statistics"),
1583 OPT_END() 1607 OPT_END()
1584}; 1608};
1585 1609
@@ -1772,12 +1796,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1772 return 0; 1796 return 0;
1773} 1797}
1774 1798
1799static int topdown_filter_events(const char **attr, char **str, bool use_group)
1800{
1801 int off = 0;
1802 int i;
1803 int len = 0;
1804 char *s;
1805
1806 for (i = 0; attr[i]; i++) {
1807 if (pmu_have_event("cpu", attr[i])) {
1808 len += strlen(attr[i]) + 1;
1809 attr[i - off] = attr[i];
1810 } else
1811 off++;
1812 }
1813 attr[i - off] = NULL;
1814
1815 *str = malloc(len + 1 + 2);
1816 if (!*str)
1817 return -1;
1818 s = *str;
1819 if (i - off == 0) {
1820 *s = 0;
1821 return 0;
1822 }
1823 if (use_group)
1824 *s++ = '{';
1825 for (i = 0; attr[i]; i++) {
1826 strcpy(s, attr[i]);
1827 s += strlen(s);
1828 *s++ = ',';
1829 }
1830 if (use_group) {
1831 s[-1] = '}';
1832 *s = 0;
1833 } else
1834 s[-1] = 0;
1835 return 0;
1836}
1837
1838__weak bool arch_topdown_check_group(bool *warn)
1839{
1840 *warn = false;
1841 return false;
1842}
1843
1844__weak void arch_topdown_group_warn(void)
1845{
1846}
1847
1775/* 1848/*
1776 * Add default attributes, if there were no attributes specified or 1849 * Add default attributes, if there were no attributes specified or
1777 * if -d/--detailed, -d -d or -d -d -d is used: 1850 * if -d/--detailed, -d -d or -d -d -d is used:
1778 */ 1851 */
1779static int add_default_attributes(void) 1852static int add_default_attributes(void)
1780{ 1853{
1854 int err;
1781 struct perf_event_attr default_attrs0[] = { 1855 struct perf_event_attr default_attrs0[] = {
1782 1856
1783 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1857 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -1896,7 +1970,6 @@ static int add_default_attributes(void)
1896 return 0; 1970 return 0;
1897 1971
1898 if (transaction_run) { 1972 if (transaction_run) {
1899 int err;
1900 if (pmu_have_event("cpu", "cycles-ct") && 1973 if (pmu_have_event("cpu", "cycles-ct") &&
1901 pmu_have_event("cpu", "el-start")) 1974 pmu_have_event("cpu", "el-start"))
1902 err = parse_events(evsel_list, transaction_attrs, NULL); 1975 err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +1982,46 @@ static int add_default_attributes(void)
1909 return 0; 1982 return 0;
1910 } 1983 }
1911 1984
1985 if (topdown_run) {
1986 char *str = NULL;
1987 bool warn = false;
1988
1989 if (stat_config.aggr_mode != AGGR_GLOBAL &&
1990 stat_config.aggr_mode != AGGR_CORE) {
1991 pr_err("top down event configuration requires --per-core mode\n");
1992 return -1;
1993 }
1994 stat_config.aggr_mode = AGGR_CORE;
1995 if (nr_cgroups || !target__has_cpu(&target)) {
1996 pr_err("top down event configuration requires system-wide mode (-a)\n");
1997 return -1;
1998 }
1999
2000 if (!force_metric_only)
2001 metric_only = true;
2002 if (topdown_filter_events(topdown_attrs, &str,
2003 arch_topdown_check_group(&warn)) < 0) {
2004 pr_err("Out of memory\n");
2005 return -1;
2006 }
2007 if (topdown_attrs[0] && str) {
2008 if (warn)
2009 arch_topdown_group_warn();
2010 err = parse_events(evsel_list, str, NULL);
2011 if (err) {
2012 fprintf(stderr,
2013 "Cannot set up top down events %s: %d\n",
2014 str, err);
2015 free(str);
2016 return -1;
2017 }
2018 } else {
2019 fprintf(stderr, "System does not support topdown\n");
2020 return -1;
2021 }
2022 free(str);
2023 }
2024
1912 if (!evsel_list->nr_entries) { 2025 if (!evsel_list->nr_entries) {
1913 if (target__has_cpu(&target)) 2026 if (target__has_cpu(&target))
1914 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2027 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 000000000000..116debe7a995
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
1#ifndef GROUP_H
2#define GROUP_H 1
3
4bool arch_topdown_check_group(bool *warn);
5void arch_topdown_group_warn(void);
6
7#endif
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 01af1ee90a27..3c15b33b2e84 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -260,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
260cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 260cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
261mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 261mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
262mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 262mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
263topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
263 264
264L1-dcache|l1-d|l1d|L1-data | 265L1-dcache|l1-d|l1d|L1-data |
265L1-icache|l1-i|l1i|L1-instruction | 266L1-icache|l1-i|l1i|L1-instruction |