summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2016-05-30 11:49:42 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-06-06 16:04:15 -0400
commit44b1e60ab576c343aa592a2a6c679297cc69740d (patch)
treeaf5a5f995cb7e0f6ce9a57f9163386d9be1321ed /tools/perf/builtin-stat.c
parent17a2634bcb88e52bd637fdaa47d7ff0bddb0188f (diff)
perf stat: Basic support for TopDown in perf stat
Add basic plumbing for TopDown in perf stat TopDown is intended to replace the frontend cycles idle/ backend cycles idle metrics in standard perf stat output. These metrics are not reliable in many workloads, due to out of order effects. This implements a new --topdown mode in perf stat (similar to --transaction) that measures the pipe line bottlenecks using standardized formulas. The measurement can be all done with 5 counters (one fixed counter) The result are four metrics: FrontendBound, BackendBound, BadSpeculation, Retiring that describe the CPU pipeline behavior on a high level. The full top down methology has many hierarchical metrics. This implementation only supports level 1 which can be collected without multiplexing. A full implementation of top down on top of perf is available in pmu-tools toplev. (http://github.com/andikleen/pmu-tools) The current version works on Intel Core CPUs starting with Sandy Bridge, and Atom CPUs starting with Silvermont. In principle the generic metrics should be also implementable on other out of order CPUs. TopDown level 1 uses a set of abstracted metrics which are generic to out of order CPU cores (although some CPUs may not implement all of them): topdown-total-slots Available slots in the pipeline topdown-slots-issued Slots issued into the pipeline topdown-slots-retired Slots successfully retired topdown-fetch-bubbles Pipeline gaps in the frontend topdown-recovery-bubbles Pipeline gaps during recovery from misspeculation These metrics then allow to compute four useful metrics: FrontendBound, BackendBound, Retiring, BadSpeculation. Add a new --topdown options to enable events. When --topdown is specified set up events for all topdown events supported by the kernel. Add topdown-* as a special case to the event parser, as is needed for all events containing -. The actual code to compute the metrics is in follow-on patches. v2: Use standard sysctl read function. v3: Move x86 specific code to arch/ v4: Enable --metric-only implicitly for topdown. v5: Add --single-thread option to not force per core mode v6: Fix output order of topdown metrics v7: Allow combining with -d v8: Remove --single-thread again v9: Rename functions, adding arch_ and topdown_. v10: Expand man page and describe TopDown better Paste intro into commit description. Print error when malloc fails. Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/1464119559-17203-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c119
1 files changed, 116 insertions, 3 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee7ada78d86f..fd76bb0b18d1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
59#include "util/thread.h" 59#include "util/thread.h"
60#include "util/thread_map.h" 60#include "util/thread_map.h"
61#include "util/counts.h" 61#include "util/counts.h"
62#include "util/group.h"
62#include "util/session.h" 63#include "util/session.h"
63#include "util/tool.h" 64#include "util/tool.h"
65#include "util/group.h"
64#include "asm/bug.h" 66#include "asm/bug.h"
65 67
68#include <api/fs/fs.h>
66#include <stdlib.h> 69#include <stdlib.h>
67#include <sys/prctl.h> 70#include <sys/prctl.h>
68#include <locale.h> 71#include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
98 "}" 101 "}"
99}; 102};
100 103
104static const char * topdown_attrs[] = {
105 "topdown-total-slots",
106 "topdown-slots-retired",
107 "topdown-recovery-bubbles",
108 "topdown-fetch-bubbles",
109 "topdown-slots-issued",
110 NULL,
111};
112
101static struct perf_evlist *evsel_list; 113static struct perf_evlist *evsel_list;
102 114
103static struct target target = { 115static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1;
112static bool null_run = false; 124static bool null_run = false;
113static int detailed_run = 0; 125static int detailed_run = 0;
114static bool transaction_run; 126static bool transaction_run;
127static bool topdown_run = false;
115static bool big_num = true; 128static bool big_num = true;
116static int big_num_opt = -1; 129static int big_num_opt = -1;
117static const char *csv_sep = NULL; 130static const char *csv_sep = NULL;
@@ -124,6 +137,7 @@ static unsigned int initial_delay = 0;
124static unsigned int unit_width = 4; /* strlen("unit") */ 137static unsigned int unit_width = 4; /* strlen("unit") */
125static bool forever = false; 138static bool forever = false;
126static bool metric_only = false; 139static bool metric_only = false;
140static bool force_metric_only = false;
127static struct timespec ref_time; 141static struct timespec ref_time;
128static struct cpu_map *aggr_map; 142static struct cpu_map *aggr_map;
129static aggr_get_id_t aggr_get_id; 143static aggr_get_id_t aggr_get_id;
@@ -1520,6 +1534,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
1520 return 0; 1534 return 0;
1521} 1535}
1522 1536
1537static int enable_metric_only(const struct option *opt __maybe_unused,
1538 const char *s __maybe_unused, int unset)
1539{
1540 force_metric_only = true;
1541 metric_only = !unset;
1542 return 0;
1543}
1544
1523static const struct option stat_options[] = { 1545static const struct option stat_options[] = {
1524 OPT_BOOLEAN('T', "transaction", &transaction_run, 1546 OPT_BOOLEAN('T', "transaction", &transaction_run,
1525 "hardware transaction statistics"), 1547 "hardware transaction statistics"),
@@ -1578,8 +1600,10 @@ static const struct option stat_options[] = {
1578 "aggregate counts per thread", AGGR_THREAD), 1600 "aggregate counts per thread", AGGR_THREAD),
1579 OPT_UINTEGER('D', "delay", &initial_delay, 1601 OPT_UINTEGER('D', "delay", &initial_delay,
1580 "ms to wait before starting measurement after program start"), 1602 "ms to wait before starting measurement after program start"),
1581 OPT_BOOLEAN(0, "metric-only", &metric_only, 1603 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
1582 "Only print computed metrics. No raw values"), 1604 "Only print computed metrics. No raw values", enable_metric_only),
1605 OPT_BOOLEAN(0, "topdown", &topdown_run,
1606 "measure topdown level 1 statistics"),
1583 OPT_END() 1607 OPT_END()
1584}; 1608};
1585 1609
@@ -1772,12 +1796,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1772 return 0; 1796 return 0;
1773} 1797}
1774 1798
1799static int topdown_filter_events(const char **attr, char **str, bool use_group)
1800{
1801 int off = 0;
1802 int i;
1803 int len = 0;
1804 char *s;
1805
1806 for (i = 0; attr[i]; i++) {
1807 if (pmu_have_event("cpu", attr[i])) {
1808 len += strlen(attr[i]) + 1;
1809 attr[i - off] = attr[i];
1810 } else
1811 off++;
1812 }
1813 attr[i - off] = NULL;
1814
1815 *str = malloc(len + 1 + 2);
1816 if (!*str)
1817 return -1;
1818 s = *str;
1819 if (i - off == 0) {
1820 *s = 0;
1821 return 0;
1822 }
1823 if (use_group)
1824 *s++ = '{';
1825 for (i = 0; attr[i]; i++) {
1826 strcpy(s, attr[i]);
1827 s += strlen(s);
1828 *s++ = ',';
1829 }
1830 if (use_group) {
1831 s[-1] = '}';
1832 *s = 0;
1833 } else
1834 s[-1] = 0;
1835 return 0;
1836}
1837
1838__weak bool arch_topdown_check_group(bool *warn)
1839{
1840 *warn = false;
1841 return false;
1842}
1843
1844__weak void arch_topdown_group_warn(void)
1845{
1846}
1847
1775/* 1848/*
1776 * Add default attributes, if there were no attributes specified or 1849 * Add default attributes, if there were no attributes specified or
1777 * if -d/--detailed, -d -d or -d -d -d is used: 1850 * if -d/--detailed, -d -d or -d -d -d is used:
1778 */ 1851 */
1779static int add_default_attributes(void) 1852static int add_default_attributes(void)
1780{ 1853{
1854 int err;
1781 struct perf_event_attr default_attrs0[] = { 1855 struct perf_event_attr default_attrs0[] = {
1782 1856
1783 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1857 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -1896,7 +1970,6 @@ static int add_default_attributes(void)
1896 return 0; 1970 return 0;
1897 1971
1898 if (transaction_run) { 1972 if (transaction_run) {
1899 int err;
1900 if (pmu_have_event("cpu", "cycles-ct") && 1973 if (pmu_have_event("cpu", "cycles-ct") &&
1901 pmu_have_event("cpu", "el-start")) 1974 pmu_have_event("cpu", "el-start"))
1902 err = parse_events(evsel_list, transaction_attrs, NULL); 1975 err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +1982,46 @@ static int add_default_attributes(void)
1909 return 0; 1982 return 0;
1910 } 1983 }
1911 1984
1985 if (topdown_run) {
1986 char *str = NULL;
1987 bool warn = false;
1988
1989 if (stat_config.aggr_mode != AGGR_GLOBAL &&
1990 stat_config.aggr_mode != AGGR_CORE) {
1991 pr_err("top down event configuration requires --per-core mode\n");
1992 return -1;
1993 }
1994 stat_config.aggr_mode = AGGR_CORE;
1995 if (nr_cgroups || !target__has_cpu(&target)) {
1996 pr_err("top down event configuration requires system-wide mode (-a)\n");
1997 return -1;
1998 }
1999
2000 if (!force_metric_only)
2001 metric_only = true;
2002 if (topdown_filter_events(topdown_attrs, &str,
2003 arch_topdown_check_group(&warn)) < 0) {
2004 pr_err("Out of memory\n");
2005 return -1;
2006 }
2007 if (topdown_attrs[0] && str) {
2008 if (warn)
2009 arch_topdown_group_warn();
2010 err = parse_events(evsel_list, str, NULL);
2011 if (err) {
2012 fprintf(stderr,
2013 "Cannot set up top down events %s: %d\n",
2014 str, err);
2015 free(str);
2016 return -1;
2017 }
2018 } else {
2019 fprintf(stderr, "System does not support topdown\n");
2020 return -1;
2021 }
2022 free(str);
2023 }
2024
1912 if (!evsel_list->nr_entries) { 2025 if (!evsel_list->nr_entries) {
1913 if (target__has_cpu(&target)) 2026 if (target__has_cpu(&target))
1914 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2027 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;