aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/stat-shadow.c
diff options
context:
space:
mode:
authorKan Liang <Kan.liang@intel.com>2017-05-26 15:05:38 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-06-21 10:35:35 -0400
commitdaefd0bc0bd28cea2e6b2f3e1a9da005cd4f58fc (patch)
tree81eaf64f1c5ad6f199d88322616c4c6eca157e0e /tools/perf/util/stat-shadow.c
parent3b00ea938653d136c8e4bcbe9722d954e128ce2e (diff)
perf stat: Add support to measure SMI cost
Implementing a new --smi-cost mode in perf stat to measure SMI cost. During the measurement, the /sys/device/cpu/freeze_on_smi will be set. The measurement can be done with one counter (unhalted core cycles), and two free running MSR counters (IA32_APERF and SMI_COUNT). In practice, the percentages of SMI core cycles should be more useful than absolute value. So the output will be the percentage of SMI core cycles and SMI#. metric_only will be set by default. SMI cycles% = (aperf - unhalted core cycles) / aperf Here is an example output. Performance counter stats for 'sudo echo ': SMI cycles% SMI# 0.1% 1 0.010858678 seconds time elapsed Users who wants to get the actual value can apply additional --no-metric-only. Signed-off-by: Kan Liang <Kan.liang@intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert Elliott <elliott@hpe.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/stat-shadow.c')
-rw-r--r--tools/perf/util/stat-shadow.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc675d39..719d6cb86952 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; 44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; 45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; 46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
47static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
48static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
47static struct rblist runtime_saved_values; 49static struct rblist runtime_saved_values;
48static bool have_frontend_stalled; 50static bool have_frontend_stalled;
49 51
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
157 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); 159 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
158 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); 160 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
159 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); 161 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
162 memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
163 memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
160 164
161 next = rb_first(&runtime_saved_values.entries); 165 next = rb_first(&runtime_saved_values.entries);
162 while (next) { 166 while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
217 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); 221 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
218 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 222 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
219 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); 223 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
224 else if (perf_stat_evsel__is(counter, SMI_NUM))
225 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
226 else if (perf_stat_evsel__is(counter, APERF))
227 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
220 228
221 if (counter->collect_stat) { 229 if (counter->collect_stat) {
222 struct saved_value *v = saved_value_lookup(counter, cpu, ctx, 230 struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
592 return sanitize_val(1.0 - sum); 600 return sanitize_val(1.0 - sum);
593} 601}
594 602
603static void print_smi_cost(int cpu, struct perf_evsel *evsel,
604 struct perf_stat_output_ctx *out)
605{
606 double smi_num, aperf, cycles, cost = 0.0;
607 int ctx = evsel_context(evsel);
608 const char *color = NULL;
609
610 smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
611 aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
612 cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
613
614 if ((cycles == 0) || (aperf == 0))
615 return;
616
617 if (smi_num)
618 cost = (aperf - cycles) / aperf * 100.00;
619
620 if (cost > 10)
621 color = PERF_COLOR_RED;
622 out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
623 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
624}
625
595void perf_stat__print_shadow_stats(struct perf_evsel *evsel, 626void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
596 double avg, int cpu, 627 double avg, int cpu,
597 struct perf_stat_output_ctx *out) 628 struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
825 } 856 }
826 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 857 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
827 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); 858 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
859 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
860 print_smi_cost(cpu, evsel, out);
828 } else { 861 } else {
829 print_metric(ctxp, NULL, NULL, NULL, 0); 862 print_metric(ctxp, NULL, NULL, NULL, 0);
830 } 863 }