diff options
author | Andi Kleen <ak@linux.intel.com> | 2013-08-21 19:47:26 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-04 04:06:07 -0400 |
commit | 4cabc3d1cb6a46f581a2628d1d11c483d5f300e5 (patch) | |
tree | 5daa1d88dd0cfa62433f7d6149f5f96c6a3fc33d | |
parent | 723478c8a471403c53cf144999701f6e0c4bbd11 (diff) |
tools/perf/stat: Add perf stat --transaction
Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.
This is a reasonable overview over the success of the transactions.
Also support architectures that have a transaction aborted cycles
counter like POWER8. Since that is awkward to handle in the kernel
abstract handle both cases here.
Enable with a new --transaction / -T option.
This requires measuring these events in a group, since they depend on each
other.
This is implemented by using TM sysfs events exported by the kernel
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnaldo Carvalho de Melo <acme@infradead.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1377128846-977-5-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 5 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 144 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 6 | ||||
-rw-r--r-- | tools/perf/util/pmu.c | 16 | ||||
-rw-r--r-- | tools/perf/util/pmu.h | 1 |
5 files changed, 171 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 73c9759005a3..80c7da6732f2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process | |||
137 | After starting the program, wait msecs before measuring. This is useful to | 137 | After starting the program, wait msecs before measuring. This is useful to |
138 | filter out the startup phase of the program, which is often very different. | 138 | filter out the startup phase of the program, which is often very different. |
139 | 139 | ||
140 | -T:: | ||
141 | --transaction:: | ||
142 | |||
143 | Print statistics of transactional execution if supported. | ||
144 | |||
140 | EXAMPLES | 145 | EXAMPLES |
141 | -------- | 146 | -------- |
142 | 147 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f686d5ff594e..cc7efee9d301 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "util/util.h" | 46 | #include "util/util.h" |
47 | #include "util/parse-options.h" | 47 | #include "util/parse-options.h" |
48 | #include "util/parse-events.h" | 48 | #include "util/parse-events.h" |
49 | #include "util/pmu.h" | ||
49 | #include "util/event.h" | 50 | #include "util/event.h" |
50 | #include "util/evlist.h" | 51 | #include "util/evlist.h" |
51 | #include "util/evsel.h" | 52 | #include "util/evsel.h" |
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix); | |||
70 | static void print_counter(struct perf_evsel *counter, char *prefix); | 71 | static void print_counter(struct perf_evsel *counter, char *prefix); |
71 | static void print_aggr(char *prefix); | 72 | static void print_aggr(char *prefix); |
72 | 73 | ||
74 | /* Default events used for perf stat -T */ | ||
75 | static const char * const transaction_attrs[] = { | ||
76 | "task-clock", | ||
77 | "{" | ||
78 | "instructions," | ||
79 | "cycles," | ||
80 | "cpu/cycles-t/," | ||
81 | "cpu/tx-start/," | ||
82 | "cpu/el-start/," | ||
83 | "cpu/cycles-ct/" | ||
84 | "}" | ||
85 | }; | ||
86 | |||
87 | /* More limited version when the CPU does not have all events. */ | ||
88 | static const char * const transaction_limited_attrs[] = { | ||
89 | "task-clock", | ||
90 | "{" | ||
91 | "instructions," | ||
92 | "cycles," | ||
93 | "cpu/cycles-t/," | ||
94 | "cpu/tx-start/" | ||
95 | "}" | ||
96 | }; | ||
97 | |||
98 | /* must match transaction_attrs and the beginning limited_attrs */ | ||
99 | enum { | ||
100 | T_TASK_CLOCK, | ||
101 | T_INSTRUCTIONS, | ||
102 | T_CYCLES, | ||
103 | T_CYCLES_IN_TX, | ||
104 | T_TRANSACTION_START, | ||
105 | T_ELISION_START, | ||
106 | T_CYCLES_IN_TX_CP, | ||
107 | }; | ||
108 | |||
73 | static struct perf_evlist *evsel_list; | 109 | static struct perf_evlist *evsel_list; |
74 | 110 | ||
75 | static struct perf_target target = { | 111 | static struct perf_target target = { |
@@ -90,6 +126,7 @@ static enum aggr_mode aggr_mode = AGGR_GLOBAL; | |||
90 | static volatile pid_t child_pid = -1; | 126 | static volatile pid_t child_pid = -1; |
91 | static bool null_run = false; | 127 | static bool null_run = false; |
92 | static int detailed_run = 0; | 128 | static int detailed_run = 0; |
129 | static bool transaction_run; | ||
93 | static bool big_num = true; | 130 | static bool big_num = true; |
94 | static int big_num_opt = -1; | 131 | static int big_num_opt = -1; |
95 | static const char *csv_sep = NULL; | 132 | static const char *csv_sep = NULL; |
@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; | |||
214 | static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; | 251 | static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; |
215 | static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | 252 | static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; |
216 | static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | 253 | static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; |
254 | static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS]; | ||
217 | static struct stats walltime_nsecs_stats; | 255 | static struct stats walltime_nsecs_stats; |
256 | static struct stats runtime_transaction_stats[MAX_NR_CPUS]; | ||
257 | static struct stats runtime_elision_stats[MAX_NR_CPUS]; | ||
218 | 258 | ||
219 | static void perf_stat__reset_stats(struct perf_evlist *evlist) | 259 | static void perf_stat__reset_stats(struct perf_evlist *evlist) |
220 | { | 260 | { |
@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) | |||
236 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | 276 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); |
237 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | 277 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); |
238 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | 278 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); |
279 | memset(runtime_cycles_in_tx_stats, 0, | ||
280 | sizeof(runtime_cycles_in_tx_stats)); | ||
281 | memset(runtime_transaction_stats, 0, | ||
282 | sizeof(runtime_transaction_stats)); | ||
283 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | ||
239 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | 284 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); |
240 | } | 285 | } |
241 | 286 | ||
@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
274 | return 0; | 319 | return 0; |
275 | } | 320 | } |
276 | 321 | ||
322 | static struct perf_evsel *nth_evsel(int n) | ||
323 | { | ||
324 | static struct perf_evsel **array; | ||
325 | static int array_len; | ||
326 | struct perf_evsel *ev; | ||
327 | int j; | ||
328 | |||
329 | /* Assumes this only called when evsel_list does not change anymore. */ | ||
330 | if (!array) { | ||
331 | list_for_each_entry(ev, &evsel_list->entries, node) | ||
332 | array_len++; | ||
333 | array = malloc(array_len * sizeof(void *)); | ||
334 | if (!array) | ||
335 | exit(ENOMEM); | ||
336 | j = 0; | ||
337 | list_for_each_entry(ev, &evsel_list->entries, node) | ||
338 | array[j++] = ev; | ||
339 | } | ||
340 | if (n < array_len) | ||
341 | return array[n]; | ||
342 | return NULL; | ||
343 | } | ||
344 | |||
277 | /* | 345 | /* |
278 | * Update various tracking values we maintain to print | 346 | * Update various tracking values we maintain to print |
279 | * more semantic information such as miss/hit ratios, | 347 | * more semantic information such as miss/hit ratios, |
@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
285 | update_stats(&runtime_nsecs_stats[0], count[0]); | 353 | update_stats(&runtime_nsecs_stats[0], count[0]); |
286 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | 354 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
287 | update_stats(&runtime_cycles_stats[0], count[0]); | 355 | update_stats(&runtime_cycles_stats[0], count[0]); |
356 | else if (transaction_run && | ||
357 | perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) | ||
358 | update_stats(&runtime_cycles_in_tx_stats[0], count[0]); | ||
359 | else if (transaction_run && | ||
360 | perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) | ||
361 | update_stats(&runtime_transaction_stats[0], count[0]); | ||
362 | else if (transaction_run && | ||
363 | perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) | ||
364 | update_stats(&runtime_elision_stats[0], count[0]); | ||
288 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | 365 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
289 | update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); | 366 | update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); |
290 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | 367 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) |
@@ -827,7 +904,7 @@ static void print_ll_cache_misses(int cpu, | |||
827 | 904 | ||
828 | static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) | 905 | static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) |
829 | { | 906 | { |
830 | double total, ratio = 0.0; | 907 | double total, ratio = 0.0, total2; |
831 | const char *fmt; | 908 | const char *fmt; |
832 | 909 | ||
833 | if (csv_output) | 910 | if (csv_output) |
@@ -923,6 +1000,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) | |||
923 | ratio = 1.0 * avg / total; | 1000 | ratio = 1.0 * avg / total; |
924 | 1001 | ||
925 | fprintf(output, " # %8.3f GHz ", ratio); | 1002 | fprintf(output, " # %8.3f GHz ", ratio); |
1003 | } else if (transaction_run && | ||
1004 | perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { | ||
1005 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
1006 | if (total) | ||
1007 | fprintf(output, | ||
1008 | " # %5.2f%% transactional cycles ", | ||
1009 | 100.0 * (avg / total)); | ||
1010 | } else if (transaction_run && | ||
1011 | perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) { | ||
1012 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
1013 | total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]); | ||
1014 | if (total2 < avg) | ||
1015 | total2 = avg; | ||
1016 | if (total) | ||
1017 | fprintf(output, | ||
1018 | " # %5.2f%% aborted cycles ", | ||
1019 | 100.0 * ((total2-avg) / total)); | ||
1020 | } else if (transaction_run && | ||
1021 | perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) && | ||
1022 | avg > 0 && | ||
1023 | runtime_cycles_in_tx_stats[cpu].n != 0) { | ||
1024 | total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); | ||
1025 | |||
1026 | if (total) | ||
1027 | ratio = total / avg; | ||
1028 | |||
1029 | fprintf(output, " # %8.0f cycles / transaction ", ratio); | ||
1030 | } else if (transaction_run && | ||
1031 | perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) && | ||
1032 | avg > 0 && | ||
1033 | runtime_cycles_in_tx_stats[cpu].n != 0) { | ||
1034 | total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); | ||
1035 | |||
1036 | if (total) | ||
1037 | ratio = total / avg; | ||
1038 | |||
1039 | fprintf(output, " # %8.0f cycles / elision ", ratio); | ||
926 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 1040 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
927 | char unit = 'M'; | 1041 | char unit = 'M'; |
928 | 1042 | ||
@@ -1236,6 +1350,16 @@ static int perf_stat_init_aggr_mode(void) | |||
1236 | return 0; | 1350 | return 0; |
1237 | } | 1351 | } |
1238 | 1352 | ||
1353 | static int setup_events(const char * const *attrs, unsigned len) | ||
1354 | { | ||
1355 | unsigned i; | ||
1356 | |||
1357 | for (i = 0; i < len; i++) { | ||
1358 | if (parse_events(evsel_list, attrs[i])) | ||
1359 | return -1; | ||
1360 | } | ||
1361 | return 0; | ||
1362 | } | ||
1239 | 1363 | ||
1240 | /* | 1364 | /* |
1241 | * Add default attributes, if there were no attributes specified or | 1365 | * Add default attributes, if there were no attributes specified or |
@@ -1354,6 +1478,22 @@ static int add_default_attributes(void) | |||
1354 | if (null_run) | 1478 | if (null_run) |
1355 | return 0; | 1479 | return 0; |
1356 | 1480 | ||
1481 | if (transaction_run) { | ||
1482 | int err; | ||
1483 | if (pmu_have_event("cpu", "cycles-ct") && | ||
1484 | pmu_have_event("cpu", "el-start")) | ||
1485 | err = setup_events(transaction_attrs, | ||
1486 | ARRAY_SIZE(transaction_attrs)); | ||
1487 | else | ||
1488 | err = setup_events(transaction_limited_attrs, | ||
1489 | ARRAY_SIZE(transaction_limited_attrs)); | ||
1490 | if (err < 0) { | ||
1491 | fprintf(stderr, "Cannot set up transaction events\n"); | ||
1492 | return -1; | ||
1493 | } | ||
1494 | return 0; | ||
1495 | } | ||
1496 | |||
1357 | if (!evsel_list->nr_entries) { | 1497 | if (!evsel_list->nr_entries) { |
1358 | if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) | 1498 | if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) |
1359 | return -1; | 1499 | return -1; |
@@ -1388,6 +1528,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1388 | int output_fd = 0; | 1528 | int output_fd = 0; |
1389 | const char *output_name = NULL; | 1529 | const char *output_name = NULL; |
1390 | const struct option options[] = { | 1530 | const struct option options[] = { |
1531 | OPT_BOOLEAN('T', "transaction", &transaction_run, | ||
1532 | "hardware transaction statistics"), | ||
1391 | OPT_CALLBACK('e', "event", &evsel_list, "event", | 1533 | OPT_CALLBACK('e', "event", &evsel_list, "event", |
1392 | "event selector. use 'perf list' to list available events", | 1534 | "event selector. use 'perf list' to list available events", |
1393 | parse_events_option), | 1535 | parse_events_option), |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a7bdc713bab..5aa68cddc7d9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -197,6 +197,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, | |||
197 | (e1->attr.config == e2->attr.config); | 197 | (e1->attr.config == e2->attr.config); |
198 | } | 198 | } |
199 | 199 | ||
200 | #define perf_evsel__cmp(a, b) \ | ||
201 | ((a) && \ | ||
202 | (b) && \ | ||
203 | (a)->attr.type == (b)->attr.type && \ | ||
204 | (a)->attr.config == (b)->attr.config) | ||
205 | |||
200 | int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | 206 | int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, |
201 | int cpu, int thread, bool scale); | 207 | int cpu, int thread, bool scale); |
202 | 208 | ||
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bc9d8069d376..64362fe45b71 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c | |||
@@ -637,3 +637,19 @@ void print_pmu_events(const char *event_glob, bool name_only) | |||
637 | printf("\n"); | 637 | printf("\n"); |
638 | free(aliases); | 638 | free(aliases); |
639 | } | 639 | } |
640 | |||
641 | bool pmu_have_event(const char *pname, const char *name) | ||
642 | { | ||
643 | struct perf_pmu *pmu; | ||
644 | struct perf_pmu_alias *alias; | ||
645 | |||
646 | pmu = NULL; | ||
647 | while ((pmu = perf_pmu__scan(pmu)) != NULL) { | ||
648 | if (strcmp(pname, pmu->name)) | ||
649 | continue; | ||
650 | list_for_each_entry(alias, &pmu->aliases, list) | ||
651 | if (!strcmp(alias->name, name)) | ||
652 | return true; | ||
653 | } | ||
654 | return false; | ||
655 | } | ||
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6b2cbe2d4cc3..1179b26f244a 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h | |||
@@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); | |||
42 | struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); | 42 | struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); |
43 | 43 | ||
44 | void print_pmu_events(const char *event_glob, bool name_only); | 44 | void print_pmu_events(const char *event_glob, bool name_only); |
45 | bool pmu_have_event(const char *pname, const char *name); | ||
45 | 46 | ||
46 | int perf_pmu__test(void); | 47 | int perf_pmu__test(void); |
47 | #endif /* __PMU_H */ | 48 | #endif /* __PMU_H */ |