diff options
author | Kan Liang <Kan.liang@intel.com> | 2017-05-26 15:05:38 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-06-21 10:35:35 -0400 |
commit | daefd0bc0bd28cea2e6b2f3e1a9da005cd4f58fc (patch) | |
tree | 81eaf64f1c5ad6f199d88322616c4c6eca157e0e | |
parent | 3b00ea938653d136c8e4bcbe9722d954e128ce2e (diff) |
perf stat: Add support to measure SMI cost
Implementing a new --smi-cost mode in perf stat to measure SMI cost.
During the measurement, the /sys/device/cpu/freeze_on_smi will be set.
The measurement can be done with one counter (unhalted core cycles), and
two free running MSR counters (IA32_APERF and SMI_COUNT).
In practice, the percentages of SMI core cycles should be more useful
than absolute value. So the output will be the percentage of SMI core
cycles and SMI#. metric_only will be set by default.
SMI cycles% = (aperf - unhalted core cycles) / aperf
Here is an example output.
Performance counter stats for 'sudo echo ':
SMI cycles% SMI#
0.1% 1
0.010858678 seconds time elapsed
Users who wants to get the actual value can apply additional
--no-metric-only.
Signed-off-by: Kan Liang <Kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Elliott <elliott@hpe.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 14 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 49 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 33 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 2 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 2 |
5 files changed, 100 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index bd0e4417f2be..698076313606 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -239,6 +239,20 @@ taskset. | |||
239 | --no-merge:: | 239 | --no-merge:: |
240 | Do not merge results from same PMUs. | 240 | Do not merge results from same PMUs. |
241 | 241 | ||
242 | --smi-cost:: | ||
243 | Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. | ||
244 | |||
245 | During the measurement, the /sys/device/cpu/freeze_on_smi will be set to | ||
246 | freeze core counters on SMI. | ||
247 | The aperf counter will not be effected by the setting. | ||
248 | The cost of SMI can be measured by (aperf - unhalted core cycles). | ||
249 | |||
250 | In practice, the percentages of SMI cycles is very useful for performance | ||
251 | oriented analysis. --metric_only will be applied by default. | ||
252 | The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf | ||
253 | |||
254 | Users who wants to get the actual value can apply --no-metric-only. | ||
255 | |||
242 | EXAMPLES | 256 | EXAMPLES |
243 | -------- | 257 | -------- |
244 | 258 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ad9324d1daf9..324363054c3f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #define DEFAULT_SEPARATOR " " | 86 | #define DEFAULT_SEPARATOR " " |
87 | #define CNTR_NOT_SUPPORTED "<not supported>" | 87 | #define CNTR_NOT_SUPPORTED "<not supported>" |
88 | #define CNTR_NOT_COUNTED "<not counted>" | 88 | #define CNTR_NOT_COUNTED "<not counted>" |
89 | #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" | ||
89 | 90 | ||
90 | static void print_counters(struct timespec *ts, int argc, const char **argv); | 91 | static void print_counters(struct timespec *ts, int argc, const char **argv); |
91 | 92 | ||
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = { | |||
122 | NULL, | 123 | NULL, |
123 | }; | 124 | }; |
124 | 125 | ||
126 | static const char *smi_cost_attrs = { | ||
127 | "{" | ||
128 | "msr/aperf/," | ||
129 | "msr/smi/," | ||
130 | "cycles" | ||
131 | "}" | ||
132 | }; | ||
133 | |||
125 | static struct perf_evlist *evsel_list; | 134 | static struct perf_evlist *evsel_list; |
126 | 135 | ||
127 | static struct target target = { | 136 | static struct target target = { |
@@ -137,6 +146,8 @@ static bool null_run = false; | |||
137 | static int detailed_run = 0; | 146 | static int detailed_run = 0; |
138 | static bool transaction_run; | 147 | static bool transaction_run; |
139 | static bool topdown_run = false; | 148 | static bool topdown_run = false; |
149 | static bool smi_cost = false; | ||
150 | static bool smi_reset = false; | ||
140 | static bool big_num = true; | 151 | static bool big_num = true; |
141 | static int big_num_opt = -1; | 152 | static int big_num_opt = -1; |
142 | static const char *csv_sep = NULL; | 153 | static const char *csv_sep = NULL; |
@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = { | |||
1782 | "Only print computed metrics. No raw values", enable_metric_only), | 1793 | "Only print computed metrics. No raw values", enable_metric_only), |
1783 | OPT_BOOLEAN(0, "topdown", &topdown_run, | 1794 | OPT_BOOLEAN(0, "topdown", &topdown_run, |
1784 | "measure topdown level 1 statistics"), | 1795 | "measure topdown level 1 statistics"), |
1796 | OPT_BOOLEAN(0, "smi-cost", &smi_cost, | ||
1797 | "measure SMI cost"), | ||
1785 | OPT_END() | 1798 | OPT_END() |
1786 | }; | 1799 | }; |
1787 | 1800 | ||
@@ -2160,6 +2173,39 @@ static int add_default_attributes(void) | |||
2160 | return 0; | 2173 | return 0; |
2161 | } | 2174 | } |
2162 | 2175 | ||
2176 | if (smi_cost) { | ||
2177 | int smi; | ||
2178 | |||
2179 | if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { | ||
2180 | fprintf(stderr, "freeze_on_smi is not supported.\n"); | ||
2181 | return -1; | ||
2182 | } | ||
2183 | |||
2184 | if (!smi) { | ||
2185 | if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { | ||
2186 | fprintf(stderr, "Failed to set freeze_on_smi.\n"); | ||
2187 | return -1; | ||
2188 | } | ||
2189 | smi_reset = true; | ||
2190 | } | ||
2191 | |||
2192 | if (pmu_have_event("msr", "aperf") && | ||
2193 | pmu_have_event("msr", "smi")) { | ||
2194 | if (!force_metric_only) | ||
2195 | metric_only = true; | ||
2196 | err = parse_events(evsel_list, smi_cost_attrs, NULL); | ||
2197 | } else { | ||
2198 | fprintf(stderr, "To measure SMI cost, it needs " | ||
2199 | "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); | ||
2200 | return -1; | ||
2201 | } | ||
2202 | if (err) { | ||
2203 | fprintf(stderr, "Cannot set up SMI cost events\n"); | ||
2204 | return -1; | ||
2205 | } | ||
2206 | return 0; | ||
2207 | } | ||
2208 | |||
2163 | if (topdown_run) { | 2209 | if (topdown_run) { |
2164 | char *str = NULL; | 2210 | char *str = NULL; |
2165 | bool warn = false; | 2211 | bool warn = false; |
@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv) | |||
2742 | perf_stat__exit_aggr_mode(); | 2788 | perf_stat__exit_aggr_mode(); |
2743 | perf_evlist__free_stats(evsel_list); | 2789 | perf_evlist__free_stats(evsel_list); |
2744 | out: | 2790 | out: |
2791 | if (smi_cost && smi_reset) | ||
2792 | sysfs__write_int(FREEZE_ON_SMI_PATH, 0); | ||
2793 | |||
2745 | perf_evlist__delete(evsel_list); | 2794 | perf_evlist__delete(evsel_list); |
2746 | return status; | 2795 | return status; |
2747 | } | 2796 | } |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index ac10cc675d39..719d6cb86952 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | |||
44 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | 44 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; |
45 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | 45 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; |
46 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | 46 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; |
47 | static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; | ||
48 | static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; | ||
47 | static struct rblist runtime_saved_values; | 49 | static struct rblist runtime_saved_values; |
48 | static bool have_frontend_stalled; | 50 | static bool have_frontend_stalled; |
49 | 51 | ||
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void) | |||
157 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | 159 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); |
158 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | 160 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); |
159 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | 161 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); |
162 | memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); | ||
163 | memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); | ||
160 | 164 | ||
161 | next = rb_first(&runtime_saved_values.entries); | 165 | next = rb_first(&runtime_saved_values.entries); |
162 | while (next) { | 166 | while (next) { |
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |||
217 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | 221 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); |
218 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | 222 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) |
219 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | 223 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); |
224 | else if (perf_stat_evsel__is(counter, SMI_NUM)) | ||
225 | update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); | ||
226 | else if (perf_stat_evsel__is(counter, APERF)) | ||
227 | update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); | ||
220 | 228 | ||
221 | if (counter->collect_stat) { | 229 | if (counter->collect_stat) { |
222 | struct saved_value *v = saved_value_lookup(counter, cpu, ctx, | 230 | struct saved_value *v = saved_value_lookup(counter, cpu, ctx, |
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu) | |||
592 | return sanitize_val(1.0 - sum); | 600 | return sanitize_val(1.0 - sum); |
593 | } | 601 | } |
594 | 602 | ||
603 | static void print_smi_cost(int cpu, struct perf_evsel *evsel, | ||
604 | struct perf_stat_output_ctx *out) | ||
605 | { | ||
606 | double smi_num, aperf, cycles, cost = 0.0; | ||
607 | int ctx = evsel_context(evsel); | ||
608 | const char *color = NULL; | ||
609 | |||
610 | smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); | ||
611 | aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); | ||
612 | cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
613 | |||
614 | if ((cycles == 0) || (aperf == 0)) | ||
615 | return; | ||
616 | |||
617 | if (smi_num) | ||
618 | cost = (aperf - cycles) / aperf * 100.00; | ||
619 | |||
620 | if (cost > 10) | ||
621 | color = PERF_COLOR_RED; | ||
622 | out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); | ||
623 | out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); | ||
624 | } | ||
625 | |||
595 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | 626 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, |
596 | double avg, int cpu, | 627 | double avg, int cpu, |
597 | struct perf_stat_output_ctx *out) | 628 | struct perf_stat_output_ctx *out) |
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
825 | } | 856 | } |
826 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); | 857 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); |
827 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); | 858 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); |
859 | } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { | ||
860 | print_smi_cost(cpu, evsel, out); | ||
828 | } else { | 861 | } else { |
829 | print_metric(ctxp, NULL, NULL, NULL, 0); | 862 | print_metric(ctxp, NULL, NULL, NULL, 0); |
830 | } | 863 | } |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c58174443dc1..53b9a994a3dc 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { | |||
86 | ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), | 86 | ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), |
87 | ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), | 87 | ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), |
88 | ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), | 88 | ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), |
89 | ID(SMI_NUM, msr/smi/), | ||
90 | ID(APERF, msr/aperf/), | ||
89 | }; | 91 | }; |
90 | #undef ID | 92 | #undef ID |
91 | 93 | ||
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0a65ae23f495..7522bf10b03e 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id { | |||
22 | PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, | 22 | PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, |
23 | PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, | 23 | PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, |
24 | PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, | 24 | PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, |
25 | PERF_STAT_EVSEL_ID__SMI_NUM, | ||
26 | PERF_STAT_EVSEL_ID__APERF, | ||
25 | PERF_STAT_EVSEL_ID__MAX, | 27 | PERF_STAT_EVSEL_ID__MAX, |
26 | }; | 28 | }; |
27 | 29 | ||