aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2013-08-21 19:47:26 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-04 04:06:07 -0400
commit4cabc3d1cb6a46f581a2628d1d11c483d5f300e5 (patch)
tree5daa1d88dd0cfa62433f7d6149f5f96c6a3fc33d /tools/perf/builtin-stat.c
parent723478c8a471403c53cf144999701f6e0c4bbd11 (diff)
tools/perf/stat: Add perf stat --transaction
Add support to perf stat to print the basic transactional execution statistics: Total cycles, Cycles in Transaction, Cycles in aborted transsactions using the in_tx and in_tx_checkpoint qualifiers. Transaction Starts and Elision Starts, to compute the average transaction length. This is a reasonable overview over the success of the transactions. Also support architectures that have a transaction aborted cycles counter like POWER8. Since that is awkward to handle in the kernel abstract handle both cases here. Enable with a new --transaction / -T option. This requires measuring these events in a group, since they depend on each other. This is implemented by using TM sysfs events exported by the kernel Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Arnaldo Carvalho de Melo <acme@infradead.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1377128846-977-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c144
1 files changed, 143 insertions, 1 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f686d5ff594e..cc7efee9d301 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
46#include "util/util.h" 46#include "util/util.h"
47#include "util/parse-options.h" 47#include "util/parse-options.h"
48#include "util/parse-events.h" 48#include "util/parse-events.h"
49#include "util/pmu.h"
49#include "util/event.h" 50#include "util/event.h"
50#include "util/evlist.h" 51#include "util/evlist.h"
51#include "util/evsel.h" 52#include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix); 71static void print_counter(struct perf_evsel *counter, char *prefix);
71static void print_aggr(char *prefix); 72static void print_aggr(char *prefix);
72 73
74/* Default events used for perf stat -T */
75static const char * const transaction_attrs[] = {
76 "task-clock",
77 "{"
78 "instructions,"
79 "cycles,"
80 "cpu/cycles-t/,"
81 "cpu/tx-start/,"
82 "cpu/el-start/,"
83 "cpu/cycles-ct/"
84 "}"
85};
86
87/* More limited version when the CPU does not have all events. */
88static const char * const transaction_limited_attrs[] = {
89 "task-clock",
90 "{"
91 "instructions,"
92 "cycles,"
93 "cpu/cycles-t/,"
94 "cpu/tx-start/"
95 "}"
96};
97
98/* must match transaction_attrs and the beginning limited_attrs */
99enum {
100 T_TASK_CLOCK,
101 T_INSTRUCTIONS,
102 T_CYCLES,
103 T_CYCLES_IN_TX,
104 T_TRANSACTION_START,
105 T_ELISION_START,
106 T_CYCLES_IN_TX_CP,
107};
108
73static struct perf_evlist *evsel_list; 109static struct perf_evlist *evsel_list;
74 110
75static struct perf_target target = { 111static struct perf_target target = {
@@ -90,6 +126,7 @@ static enum aggr_mode aggr_mode = AGGR_GLOBAL;
90static volatile pid_t child_pid = -1; 126static volatile pid_t child_pid = -1;
91static bool null_run = false; 127static bool null_run = false;
92static int detailed_run = 0; 128static int detailed_run = 0;
129static bool transaction_run;
93static bool big_num = true; 130static bool big_num = true;
94static int big_num_opt = -1; 131static int big_num_opt = -1;
95static const char *csv_sep = NULL; 132static const char *csv_sep = NULL;
@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
214static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; 251static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
215static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; 252static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
216static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 253static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
254static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
217static struct stats walltime_nsecs_stats; 255static struct stats walltime_nsecs_stats;
256static struct stats runtime_transaction_stats[MAX_NR_CPUS];
257static struct stats runtime_elision_stats[MAX_NR_CPUS];
218 258
219static void perf_stat__reset_stats(struct perf_evlist *evlist) 259static void perf_stat__reset_stats(struct perf_evlist *evlist)
220{ 260{
@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
236 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); 276 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
237 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); 277 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
238 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); 278 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
279 memset(runtime_cycles_in_tx_stats, 0,
280 sizeof(runtime_cycles_in_tx_stats));
281 memset(runtime_transaction_stats, 0,
282 sizeof(runtime_transaction_stats));
283 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
239 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 284 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
240} 285}
241 286
@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel)
274 return 0; 319 return 0;
275} 320}
276 321
322static struct perf_evsel *nth_evsel(int n)
323{
324 static struct perf_evsel **array;
325 static int array_len;
326 struct perf_evsel *ev;
327 int j;
328
329 /* Assumes this only called when evsel_list does not change anymore. */
330 if (!array) {
331 list_for_each_entry(ev, &evsel_list->entries, node)
332 array_len++;
333 array = malloc(array_len * sizeof(void *));
334 if (!array)
335 exit(ENOMEM);
336 j = 0;
337 list_for_each_entry(ev, &evsel_list->entries, node)
338 array[j++] = ev;
339 }
340 if (n < array_len)
341 return array[n];
342 return NULL;
343}
344
277/* 345/*
278 * Update various tracking values we maintain to print 346 * Update various tracking values we maintain to print
279 * more semantic information such as miss/hit ratios, 347 * more semantic information such as miss/hit ratios,
@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
285 update_stats(&runtime_nsecs_stats[0], count[0]); 353 update_stats(&runtime_nsecs_stats[0], count[0]);
286 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 354 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
287 update_stats(&runtime_cycles_stats[0], count[0]); 355 update_stats(&runtime_cycles_stats[0], count[0]);
356 else if (transaction_run &&
357 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
358 update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
359 else if (transaction_run &&
360 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
361 update_stats(&runtime_transaction_stats[0], count[0]);
362 else if (transaction_run &&
363 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
364 update_stats(&runtime_elision_stats[0], count[0]);
288 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 365 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
289 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 366 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
290 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 367 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -827,7 +904,7 @@ static void print_ll_cache_misses(int cpu,
827 904
828static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 905static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
829{ 906{
830 double total, ratio = 0.0; 907 double total, ratio = 0.0, total2;
831 const char *fmt; 908 const char *fmt;
832 909
833 if (csv_output) 910 if (csv_output)
@@ -923,6 +1000,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
923 ratio = 1.0 * avg / total; 1000 ratio = 1.0 * avg / total;
924 1001
925 fprintf(output, " # %8.3f GHz ", ratio); 1002 fprintf(output, " # %8.3f GHz ", ratio);
1003 } else if (transaction_run &&
1004 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
1005 total = avg_stats(&runtime_cycles_stats[cpu]);
1006 if (total)
1007 fprintf(output,
1008 " # %5.2f%% transactional cycles ",
1009 100.0 * (avg / total));
1010 } else if (transaction_run &&
1011 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
1012 total = avg_stats(&runtime_cycles_stats[cpu]);
1013 total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1014 if (total2 < avg)
1015 total2 = avg;
1016 if (total)
1017 fprintf(output,
1018 " # %5.2f%% aborted cycles ",
1019 100.0 * ((total2-avg) / total));
1020 } else if (transaction_run &&
1021 perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
1022 avg > 0 &&
1023 runtime_cycles_in_tx_stats[cpu].n != 0) {
1024 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1025
1026 if (total)
1027 ratio = total / avg;
1028
1029 fprintf(output, " # %8.0f cycles / transaction ", ratio);
1030 } else if (transaction_run &&
1031 perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
1032 avg > 0 &&
1033 runtime_cycles_in_tx_stats[cpu].n != 0) {
1034 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1035
1036 if (total)
1037 ratio = total / avg;
1038
1039 fprintf(output, " # %8.0f cycles / elision ", ratio);
926 } else if (runtime_nsecs_stats[cpu].n != 0) { 1040 } else if (runtime_nsecs_stats[cpu].n != 0) {
927 char unit = 'M'; 1041 char unit = 'M';
928 1042
@@ -1236,6 +1350,16 @@ static int perf_stat_init_aggr_mode(void)
1236 return 0; 1350 return 0;
1237} 1351}
1238 1352
1353static int setup_events(const char * const *attrs, unsigned len)
1354{
1355 unsigned i;
1356
1357 for (i = 0; i < len; i++) {
1358 if (parse_events(evsel_list, attrs[i]))
1359 return -1;
1360 }
1361 return 0;
1362}
1239 1363
1240/* 1364/*
1241 * Add default attributes, if there were no attributes specified or 1365 * Add default attributes, if there were no attributes specified or
@@ -1354,6 +1478,22 @@ static int add_default_attributes(void)
1354 if (null_run) 1478 if (null_run)
1355 return 0; 1479 return 0;
1356 1480
1481 if (transaction_run) {
1482 int err;
1483 if (pmu_have_event("cpu", "cycles-ct") &&
1484 pmu_have_event("cpu", "el-start"))
1485 err = setup_events(transaction_attrs,
1486 ARRAY_SIZE(transaction_attrs));
1487 else
1488 err = setup_events(transaction_limited_attrs,
1489 ARRAY_SIZE(transaction_limited_attrs));
1490 if (err < 0) {
1491 fprintf(stderr, "Cannot set up transaction events\n");
1492 return -1;
1493 }
1494 return 0;
1495 }
1496
1357 if (!evsel_list->nr_entries) { 1497 if (!evsel_list->nr_entries) {
1358 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1498 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1359 return -1; 1499 return -1;
@@ -1388,6 +1528,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1388 int output_fd = 0; 1528 int output_fd = 0;
1389 const char *output_name = NULL; 1529 const char *output_name = NULL;
1390 const struct option options[] = { 1530 const struct option options[] = {
1531 OPT_BOOLEAN('T', "transaction", &transaction_run,
1532 "hardware transaction statistics"),
1391 OPT_CALLBACK('e', "event", &evsel_list, "event", 1533 OPT_CALLBACK('e', "event", &evsel_list, "event",
1392 "event selector. use 'perf list' to list available events", 1534 "event selector. use 'perf list' to list available events",
1393 parse_events_option), 1535 parse_events_option),