aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2013-08-21 19:47:26 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-04 04:06:07 -0400
commit4cabc3d1cb6a46f581a2628d1d11c483d5f300e5 (patch)
tree5daa1d88dd0cfa62433f7d6149f5f96c6a3fc33d
parent723478c8a471403c53cf144999701f6e0c4bbd11 (diff)
tools/perf/stat: Add perf stat --transaction
Add support to perf stat to print the basic transactional execution statistics: Total cycles, Cycles in Transaction, Cycles in aborted transsactions using the in_tx and in_tx_checkpoint qualifiers. Transaction Starts and Elision Starts, to compute the average transaction length. This is a reasonable overview over the success of the transactions. Also support architectures that have a transaction aborted cycles counter like POWER8. Since that is awkward to handle in the kernel abstract handle both cases here. Enable with a new --transaction / -T option. This requires measuring these events in a group, since they depend on each other. This is implemented by using TM sysfs events exported by the kernel Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Arnaldo Carvalho de Melo <acme@infradead.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1377128846-977-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/perf/Documentation/perf-stat.txt5
-rw-r--r--tools/perf/builtin-stat.c144
-rw-r--r--tools/perf/util/evsel.h6
-rw-r--r--tools/perf/util/pmu.c16
-rw-r--r--tools/perf/util/pmu.h1
5 files changed, 171 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 73c9759005a3..80c7da6732f2 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process
137After starting the program, wait msecs before measuring. This is useful to 137After starting the program, wait msecs before measuring. This is useful to
138filter out the startup phase of the program, which is often very different. 138filter out the startup phase of the program, which is often very different.
139 139
140-T::
141--transaction::
142
143Print statistics of transactional execution if supported.
144
140EXAMPLES 145EXAMPLES
141-------- 146--------
142 147
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f686d5ff594e..cc7efee9d301 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
46#include "util/util.h" 46#include "util/util.h"
47#include "util/parse-options.h" 47#include "util/parse-options.h"
48#include "util/parse-events.h" 48#include "util/parse-events.h"
49#include "util/pmu.h"
49#include "util/event.h" 50#include "util/event.h"
50#include "util/evlist.h" 51#include "util/evlist.h"
51#include "util/evsel.h" 52#include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix); 71static void print_counter(struct perf_evsel *counter, char *prefix);
71static void print_aggr(char *prefix); 72static void print_aggr(char *prefix);
72 73
74/* Default events used for perf stat -T */
75static const char * const transaction_attrs[] = {
76 "task-clock",
77 "{"
78 "instructions,"
79 "cycles,"
80 "cpu/cycles-t/,"
81 "cpu/tx-start/,"
82 "cpu/el-start/,"
83 "cpu/cycles-ct/"
84 "}"
85};
86
87/* More limited version when the CPU does not have all events. */
88static const char * const transaction_limited_attrs[] = {
89 "task-clock",
90 "{"
91 "instructions,"
92 "cycles,"
93 "cpu/cycles-t/,"
94 "cpu/tx-start/"
95 "}"
96};
97
98/* must match transaction_attrs and the beginning limited_attrs */
99enum {
100 T_TASK_CLOCK,
101 T_INSTRUCTIONS,
102 T_CYCLES,
103 T_CYCLES_IN_TX,
104 T_TRANSACTION_START,
105 T_ELISION_START,
106 T_CYCLES_IN_TX_CP,
107};
108
73static struct perf_evlist *evsel_list; 109static struct perf_evlist *evsel_list;
74 110
75static struct perf_target target = { 111static struct perf_target target = {
@@ -90,6 +126,7 @@ static enum aggr_mode aggr_mode = AGGR_GLOBAL;
90static volatile pid_t child_pid = -1; 126static volatile pid_t child_pid = -1;
91static bool null_run = false; 127static bool null_run = false;
92static int detailed_run = 0; 128static int detailed_run = 0;
129static bool transaction_run;
93static bool big_num = true; 130static bool big_num = true;
94static int big_num_opt = -1; 131static int big_num_opt = -1;
95static const char *csv_sep = NULL; 132static const char *csv_sep = NULL;
@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
214static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; 251static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
215static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; 252static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
216static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 253static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
254static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
217static struct stats walltime_nsecs_stats; 255static struct stats walltime_nsecs_stats;
256static struct stats runtime_transaction_stats[MAX_NR_CPUS];
257static struct stats runtime_elision_stats[MAX_NR_CPUS];
218 258
219static void perf_stat__reset_stats(struct perf_evlist *evlist) 259static void perf_stat__reset_stats(struct perf_evlist *evlist)
220{ 260{
@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
236 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); 276 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
237 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); 277 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
238 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); 278 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
279 memset(runtime_cycles_in_tx_stats, 0,
280 sizeof(runtime_cycles_in_tx_stats));
281 memset(runtime_transaction_stats, 0,
282 sizeof(runtime_transaction_stats));
283 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
239 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 284 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
240} 285}
241 286
@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel)
274 return 0; 319 return 0;
275} 320}
276 321
322static struct perf_evsel *nth_evsel(int n)
323{
324 static struct perf_evsel **array;
325 static int array_len;
326 struct perf_evsel *ev;
327 int j;
328
329 /* Assumes this only called when evsel_list does not change anymore. */
330 if (!array) {
331 list_for_each_entry(ev, &evsel_list->entries, node)
332 array_len++;
333 array = malloc(array_len * sizeof(void *));
334 if (!array)
335 exit(ENOMEM);
336 j = 0;
337 list_for_each_entry(ev, &evsel_list->entries, node)
338 array[j++] = ev;
339 }
340 if (n < array_len)
341 return array[n];
342 return NULL;
343}
344
277/* 345/*
278 * Update various tracking values we maintain to print 346 * Update various tracking values we maintain to print
279 * more semantic information such as miss/hit ratios, 347 * more semantic information such as miss/hit ratios,
@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
285 update_stats(&runtime_nsecs_stats[0], count[0]); 353 update_stats(&runtime_nsecs_stats[0], count[0]);
286 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 354 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
287 update_stats(&runtime_cycles_stats[0], count[0]); 355 update_stats(&runtime_cycles_stats[0], count[0]);
356 else if (transaction_run &&
357 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
358 update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
359 else if (transaction_run &&
360 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
361 update_stats(&runtime_transaction_stats[0], count[0]);
362 else if (transaction_run &&
363 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
364 update_stats(&runtime_elision_stats[0], count[0]);
288 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 365 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
289 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 366 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
290 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 367 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -827,7 +904,7 @@ static void print_ll_cache_misses(int cpu,
827 904
828static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 905static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
829{ 906{
830 double total, ratio = 0.0; 907 double total, ratio = 0.0, total2;
831 const char *fmt; 908 const char *fmt;
832 909
833 if (csv_output) 910 if (csv_output)
@@ -923,6 +1000,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
923 ratio = 1.0 * avg / total; 1000 ratio = 1.0 * avg / total;
924 1001
925 fprintf(output, " # %8.3f GHz ", ratio); 1002 fprintf(output, " # %8.3f GHz ", ratio);
1003 } else if (transaction_run &&
1004 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
1005 total = avg_stats(&runtime_cycles_stats[cpu]);
1006 if (total)
1007 fprintf(output,
1008 " # %5.2f%% transactional cycles ",
1009 100.0 * (avg / total));
1010 } else if (transaction_run &&
1011 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
1012 total = avg_stats(&runtime_cycles_stats[cpu]);
1013 total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1014 if (total2 < avg)
1015 total2 = avg;
1016 if (total)
1017 fprintf(output,
1018 " # %5.2f%% aborted cycles ",
1019 100.0 * ((total2-avg) / total));
1020 } else if (transaction_run &&
1021 perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
1022 avg > 0 &&
1023 runtime_cycles_in_tx_stats[cpu].n != 0) {
1024 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1025
1026 if (total)
1027 ratio = total / avg;
1028
1029 fprintf(output, " # %8.0f cycles / transaction ", ratio);
1030 } else if (transaction_run &&
1031 perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
1032 avg > 0 &&
1033 runtime_cycles_in_tx_stats[cpu].n != 0) {
1034 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1035
1036 if (total)
1037 ratio = total / avg;
1038
1039 fprintf(output, " # %8.0f cycles / elision ", ratio);
926 } else if (runtime_nsecs_stats[cpu].n != 0) { 1040 } else if (runtime_nsecs_stats[cpu].n != 0) {
927 char unit = 'M'; 1041 char unit = 'M';
928 1042
@@ -1236,6 +1350,16 @@ static int perf_stat_init_aggr_mode(void)
1236 return 0; 1350 return 0;
1237} 1351}
1238 1352
1353static int setup_events(const char * const *attrs, unsigned len)
1354{
1355 unsigned i;
1356
1357 for (i = 0; i < len; i++) {
1358 if (parse_events(evsel_list, attrs[i]))
1359 return -1;
1360 }
1361 return 0;
1362}
1239 1363
1240/* 1364/*
1241 * Add default attributes, if there were no attributes specified or 1365 * Add default attributes, if there were no attributes specified or
@@ -1354,6 +1478,22 @@ static int add_default_attributes(void)
1354 if (null_run) 1478 if (null_run)
1355 return 0; 1479 return 0;
1356 1480
1481 if (transaction_run) {
1482 int err;
1483 if (pmu_have_event("cpu", "cycles-ct") &&
1484 pmu_have_event("cpu", "el-start"))
1485 err = setup_events(transaction_attrs,
1486 ARRAY_SIZE(transaction_attrs));
1487 else
1488 err = setup_events(transaction_limited_attrs,
1489 ARRAY_SIZE(transaction_limited_attrs));
1490 if (err < 0) {
1491 fprintf(stderr, "Cannot set up transaction events\n");
1492 return -1;
1493 }
1494 return 0;
1495 }
1496
1357 if (!evsel_list->nr_entries) { 1497 if (!evsel_list->nr_entries) {
1358 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1498 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1359 return -1; 1499 return -1;
@@ -1388,6 +1528,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1388 int output_fd = 0; 1528 int output_fd = 0;
1389 const char *output_name = NULL; 1529 const char *output_name = NULL;
1390 const struct option options[] = { 1530 const struct option options[] = {
1531 OPT_BOOLEAN('T', "transaction", &transaction_run,
1532 "hardware transaction statistics"),
1391 OPT_CALLBACK('e', "event", &evsel_list, "event", 1533 OPT_CALLBACK('e', "event", &evsel_list, "event",
1392 "event selector. use 'perf list' to list available events", 1534 "event selector. use 'perf list' to list available events",
1393 parse_events_option), 1535 parse_events_option),
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4a7bdc713bab..5aa68cddc7d9 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -197,6 +197,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
197 (e1->attr.config == e2->attr.config); 197 (e1->attr.config == e2->attr.config);
198} 198}
199 199
200#define perf_evsel__cmp(a, b) \
201 ((a) && \
202 (b) && \
203 (a)->attr.type == (b)->attr.type && \
204 (a)->attr.config == (b)->attr.config)
205
200int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 206int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
201 int cpu, int thread, bool scale); 207 int cpu, int thread, bool scale);
202 208
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index bc9d8069d376..64362fe45b71 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -637,3 +637,19 @@ void print_pmu_events(const char *event_glob, bool name_only)
637 printf("\n"); 637 printf("\n");
638 free(aliases); 638 free(aliases);
639} 639}
640
641bool pmu_have_event(const char *pname, const char *name)
642{
643 struct perf_pmu *pmu;
644 struct perf_pmu_alias *alias;
645
646 pmu = NULL;
647 while ((pmu = perf_pmu__scan(pmu)) != NULL) {
648 if (strcmp(pname, pmu->name))
649 continue;
650 list_for_each_entry(alias, &pmu->aliases, list)
651 if (!strcmp(alias->name, name))
652 return true;
653 }
654 return false;
655}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6b2cbe2d4cc3..1179b26f244a 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
42struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); 42struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
43 43
44void print_pmu_events(const char *event_glob, bool name_only); 44void print_pmu_events(const char *event_glob, bool name_only);
45bool pmu_have_event(const char *pname, const char *name);
45 46
46int perf_pmu__test(void); 47int perf_pmu__test(void);
47#endif /* __PMU_H */ 48#endif /* __PMU_H */