aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKan Liang <Kan.liang@intel.com>2017-05-26 15:05:38 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-06-21 10:35:35 -0400
commitdaefd0bc0bd28cea2e6b2f3e1a9da005cd4f58fc (patch)
tree81eaf64f1c5ad6f199d88322616c4c6eca157e0e
parent3b00ea938653d136c8e4bcbe9722d954e128ce2e (diff)
perf stat: Add support to measure SMI cost
Implementing a new --smi-cost mode in perf stat to measure SMI cost. During the measurement, the /sys/device/cpu/freeze_on_smi will be set. The measurement can be done with one counter (unhalted core cycles), and two free running MSR counters (IA32_APERF and SMI_COUNT). In practice, the percentages of SMI core cycles should be more useful than absolute value. So the output will be the percentage of SMI core cycles and SMI#. metric_only will be set by default. SMI cycles% = (aperf - unhalted core cycles) / aperf Here is an example output. Performance counter stats for 'sudo echo ': SMI cycles% SMI# 0.1% 1 0.010858678 seconds time elapsed Users who wants to get the actual value can apply additional --no-metric-only. Signed-off-by: Kan Liang <Kan.liang@intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert Elliott <elliott@hpe.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-stat.txt14
-rw-r--r--tools/perf/builtin-stat.c49
-rw-r--r--tools/perf/util/stat-shadow.c33
-rw-r--r--tools/perf/util/stat.c2
-rw-r--r--tools/perf/util/stat.h2
5 files changed, 100 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e4417f2be..698076313606 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -239,6 +239,20 @@ taskset.
239--no-merge:: 239--no-merge::
240Do not merge results from same PMUs. 240Do not merge results from same PMUs.
241 241
242--smi-cost::
243Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
244
245During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
246freeze core counters on SMI.
247The aperf counter will not be effected by the setting.
248The cost of SMI can be measured by (aperf - unhalted core cycles).
249
250In practice, the percentages of SMI cycles is very useful for performance
251oriented analysis. --metric_only will be applied by default.
252The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
253
254Users who wants to get the actual value can apply --no-metric-only.
255
242EXAMPLES 256EXAMPLES
243-------- 257--------
244 258
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ad9324d1daf9..324363054c3f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -86,6 +86,7 @@
86#define DEFAULT_SEPARATOR " " 86#define DEFAULT_SEPARATOR " "
87#define CNTR_NOT_SUPPORTED "<not supported>" 87#define CNTR_NOT_SUPPORTED "<not supported>"
88#define CNTR_NOT_COUNTED "<not counted>" 88#define CNTR_NOT_COUNTED "<not counted>"
89#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
89 90
90static void print_counters(struct timespec *ts, int argc, const char **argv); 91static void print_counters(struct timespec *ts, int argc, const char **argv);
91 92
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
122 NULL, 123 NULL,
123}; 124};
124 125
126static const char *smi_cost_attrs = {
127 "{"
128 "msr/aperf/,"
129 "msr/smi/,"
130 "cycles"
131 "}"
132};
133
125static struct perf_evlist *evsel_list; 134static struct perf_evlist *evsel_list;
126 135
127static struct target target = { 136static struct target target = {
@@ -137,6 +146,8 @@ static bool null_run = false;
137static int detailed_run = 0; 146static int detailed_run = 0;
138static bool transaction_run; 147static bool transaction_run;
139static bool topdown_run = false; 148static bool topdown_run = false;
149static bool smi_cost = false;
150static bool smi_reset = false;
140static bool big_num = true; 151static bool big_num = true;
141static int big_num_opt = -1; 152static int big_num_opt = -1;
142static const char *csv_sep = NULL; 153static const char *csv_sep = NULL;
@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
1782 "Only print computed metrics. No raw values", enable_metric_only), 1793 "Only print computed metrics. No raw values", enable_metric_only),
1783 OPT_BOOLEAN(0, "topdown", &topdown_run, 1794 OPT_BOOLEAN(0, "topdown", &topdown_run,
1784 "measure topdown level 1 statistics"), 1795 "measure topdown level 1 statistics"),
1796 OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1797 "measure SMI cost"),
1785 OPT_END() 1798 OPT_END()
1786}; 1799};
1787 1800
@@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
2160 return 0; 2173 return 0;
2161 } 2174 }
2162 2175
2176 if (smi_cost) {
2177 int smi;
2178
2179 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
2180 fprintf(stderr, "freeze_on_smi is not supported.\n");
2181 return -1;
2182 }
2183
2184 if (!smi) {
2185 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
2186 fprintf(stderr, "Failed to set freeze_on_smi.\n");
2187 return -1;
2188 }
2189 smi_reset = true;
2190 }
2191
2192 if (pmu_have_event("msr", "aperf") &&
2193 pmu_have_event("msr", "smi")) {
2194 if (!force_metric_only)
2195 metric_only = true;
2196 err = parse_events(evsel_list, smi_cost_attrs, NULL);
2197 } else {
2198 fprintf(stderr, "To measure SMI cost, it needs "
2199 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
2200 return -1;
2201 }
2202 if (err) {
2203 fprintf(stderr, "Cannot set up SMI cost events\n");
2204 return -1;
2205 }
2206 return 0;
2207 }
2208
2163 if (topdown_run) { 2209 if (topdown_run) {
2164 char *str = NULL; 2210 char *str = NULL;
2165 bool warn = false; 2211 bool warn = false;
@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
2742 perf_stat__exit_aggr_mode(); 2788 perf_stat__exit_aggr_mode();
2743 perf_evlist__free_stats(evsel_list); 2789 perf_evlist__free_stats(evsel_list);
2744out: 2790out:
2791 if (smi_cost && smi_reset)
2792 sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2793
2745 perf_evlist__delete(evsel_list); 2794 perf_evlist__delete(evsel_list);
2746 return status; 2795 return status;
2747} 2796}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc675d39..719d6cb86952 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; 44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; 45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; 46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
47static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
48static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
47static struct rblist runtime_saved_values; 49static struct rblist runtime_saved_values;
48static bool have_frontend_stalled; 50static bool have_frontend_stalled;
49 51
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
157 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); 159 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
158 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); 160 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
159 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); 161 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
162 memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
163 memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
160 164
161 next = rb_first(&runtime_saved_values.entries); 165 next = rb_first(&runtime_saved_values.entries);
162 while (next) { 166 while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
217 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); 221 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
218 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 222 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
219 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); 223 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
224 else if (perf_stat_evsel__is(counter, SMI_NUM))
225 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
226 else if (perf_stat_evsel__is(counter, APERF))
227 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
220 228
221 if (counter->collect_stat) { 229 if (counter->collect_stat) {
222 struct saved_value *v = saved_value_lookup(counter, cpu, ctx, 230 struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
592 return sanitize_val(1.0 - sum); 600 return sanitize_val(1.0 - sum);
593} 601}
594 602
603static void print_smi_cost(int cpu, struct perf_evsel *evsel,
604 struct perf_stat_output_ctx *out)
605{
606 double smi_num, aperf, cycles, cost = 0.0;
607 int ctx = evsel_context(evsel);
608 const char *color = NULL;
609
610 smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
611 aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
612 cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
613
614 if ((cycles == 0) || (aperf == 0))
615 return;
616
617 if (smi_num)
618 cost = (aperf - cycles) / aperf * 100.00;
619
620 if (cost > 10)
621 color = PERF_COLOR_RED;
622 out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
623 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
624}
625
595void perf_stat__print_shadow_stats(struct perf_evsel *evsel, 626void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
596 double avg, int cpu, 627 double avg, int cpu,
597 struct perf_stat_output_ctx *out) 628 struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
825 } 856 }
826 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 857 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
827 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); 858 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
859 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
860 print_smi_cost(cpu, evsel, out);
828 } else { 861 } else {
829 print_metric(ctxp, NULL, NULL, NULL, 0); 862 print_metric(ctxp, NULL, NULL, NULL, 0);
830 } 863 }
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c58174443dc1..53b9a994a3dc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
86 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), 86 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
87 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), 87 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
88 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), 88 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
89 ID(SMI_NUM, msr/smi/),
90 ID(APERF, msr/aperf/),
89}; 91};
90#undef ID 92#undef ID
91 93
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae23f495..7522bf10b03e 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
22 PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, 22 PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
23 PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, 23 PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
24 PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, 24 PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
25 PERF_STAT_EVSEL_ID__SMI_NUM,
26 PERF_STAT_EVSEL_ID__APERF,
25 PERF_STAT_EVSEL_ID__MAX, 27 PERF_STAT_EVSEL_ID__MAX,
26}; 28};
27 29