diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 573 |
1 files changed, 510 insertions, 63 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 03f0e45f147..a9f06715e44 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -6,24 +6,28 @@ | |||
6 | * | 6 | * |
7 | * Sample output: | 7 | * Sample output: |
8 | 8 | ||
9 | $ perf stat ~/hackbench 10 | 9 | $ perf stat ./hackbench 10 |
10 | Time: 0.104 | ||
11 | 10 | ||
12 | Performance counter stats for '/home/mingo/hackbench': | 11 | Time: 0.118 |
13 | 12 | ||
14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor | 13 | Performance counter stats for './hackbench 10': |
15 | 54011 context switches # 0.043 M/sec | ||
16 | 385 CPU migrations # 0.000 M/sec | ||
17 | 17755 pagefaults # 0.014 M/sec | ||
18 | 3808323185 CPU cycles # 3033.219 M/sec | ||
19 | 1575111190 instructions # 1254.530 M/sec | ||
20 | 17367895 cache references # 13.833 M/sec | ||
21 | 7674421 cache misses # 6.112 M/sec | ||
22 | 14 | ||
23 | Wall-clock time elapsed: 123.786620 msecs | 15 | 1708.761321 task-clock # 11.037 CPUs utilized |
16 | 41,190 context-switches # 0.024 M/sec | ||
17 | 6,735 CPU-migrations # 0.004 M/sec | ||
18 | 17,318 page-faults # 0.010 M/sec | ||
19 | 5,205,202,243 cycles # 3.046 GHz | ||
20 | 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle | ||
21 | 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle | ||
22 | 2,603,501,247 instructions # 0.50 insns per cycle | ||
23 | # 1.48 stalled cycles per insn | ||
24 | 484,357,498 branches # 283.455 M/sec | ||
25 | 6,388,934 branch-misses # 1.32% of all branches | ||
26 | |||
27 | 0.154822978 seconds time elapsed | ||
24 | 28 | ||
25 | * | 29 | * |
26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | 30 | * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> |
27 | * | 31 | * |
28 | * Improvements and fixes by: | 32 | * Improvements and fixes by: |
29 | * | 33 | * |
@@ -46,6 +50,7 @@ | |||
46 | #include "util/evlist.h" | 50 | #include "util/evlist.h" |
47 | #include "util/evsel.h" | 51 | #include "util/evsel.h" |
48 | #include "util/debug.h" | 52 | #include "util/debug.h" |
53 | #include "util/color.h" | ||
49 | #include "util/header.h" | 54 | #include "util/header.h" |
50 | #include "util/cpumap.h" | 55 | #include "util/cpumap.h" |
51 | #include "util/thread.h" | 56 | #include "util/thread.h" |
@@ -65,14 +70,107 @@ static struct perf_event_attr default_attrs[] = { | |||
65 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | 70 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, |
66 | 71 | ||
67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | 72 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
73 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | ||
74 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | ||
68 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | 75 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
69 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 76 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
70 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | 77 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, |
71 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, | ||
72 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
73 | 78 | ||
74 | }; | 79 | }; |
75 | 80 | ||
81 | /* | ||
82 | * Detailed stats (-d), covering the L1 and last level data caches: | ||
83 | */ | ||
84 | static struct perf_event_attr detailed_attrs[] = { | ||
85 | |||
86 | { .type = PERF_TYPE_HW_CACHE, | ||
87 | .config = | ||
88 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
89 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
90 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
91 | |||
92 | { .type = PERF_TYPE_HW_CACHE, | ||
93 | .config = | ||
94 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
95 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
96 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
97 | |||
98 | { .type = PERF_TYPE_HW_CACHE, | ||
99 | .config = | ||
100 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
101 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
102 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
103 | |||
104 | { .type = PERF_TYPE_HW_CACHE, | ||
105 | .config = | ||
106 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
107 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
108 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: | ||
113 | */ | ||
114 | static struct perf_event_attr very_detailed_attrs[] = { | ||
115 | |||
116 | { .type = PERF_TYPE_HW_CACHE, | ||
117 | .config = | ||
118 | PERF_COUNT_HW_CACHE_L1I << 0 | | ||
119 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
120 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
121 | |||
122 | { .type = PERF_TYPE_HW_CACHE, | ||
123 | .config = | ||
124 | PERF_COUNT_HW_CACHE_L1I << 0 | | ||
125 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
126 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
127 | |||
128 | { .type = PERF_TYPE_HW_CACHE, | ||
129 | .config = | ||
130 | PERF_COUNT_HW_CACHE_DTLB << 0 | | ||
131 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
132 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
133 | |||
134 | { .type = PERF_TYPE_HW_CACHE, | ||
135 | .config = | ||
136 | PERF_COUNT_HW_CACHE_DTLB << 0 | | ||
137 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
138 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
139 | |||
140 | { .type = PERF_TYPE_HW_CACHE, | ||
141 | .config = | ||
142 | PERF_COUNT_HW_CACHE_ITLB << 0 | | ||
143 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
144 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
145 | |||
146 | { .type = PERF_TYPE_HW_CACHE, | ||
147 | .config = | ||
148 | PERF_COUNT_HW_CACHE_ITLB << 0 | | ||
149 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
150 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
151 | |||
152 | }; | ||
153 | |||
154 | /* | ||
155 | * Very, very detailed stats (-d -d -d), adding prefetch events: | ||
156 | */ | ||
157 | static struct perf_event_attr very_very_detailed_attrs[] = { | ||
158 | |||
159 | { .type = PERF_TYPE_HW_CACHE, | ||
160 | .config = | ||
161 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
162 | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | | ||
163 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
164 | |||
165 | { .type = PERF_TYPE_HW_CACHE, | ||
166 | .config = | ||
167 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
168 | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | | ||
169 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
170 | }; | ||
171 | |||
172 | |||
173 | |||
76 | struct perf_evlist *evsel_list; | 174 | struct perf_evlist *evsel_list; |
77 | 175 | ||
78 | static bool system_wide = false; | 176 | static bool system_wide = false; |
@@ -86,6 +184,8 @@ static pid_t target_pid = -1; | |||
86 | static pid_t target_tid = -1; | 184 | static pid_t target_tid = -1; |
87 | static pid_t child_pid = -1; | 185 | static pid_t child_pid = -1; |
88 | static bool null_run = false; | 186 | static bool null_run = false; |
187 | static int detailed_run = 0; | ||
188 | static bool sync_run = false; | ||
89 | static bool big_num = true; | 189 | static bool big_num = true; |
90 | static int big_num_opt = -1; | 190 | static int big_num_opt = -1; |
91 | static const char *cpu_list; | 191 | static const char *cpu_list; |
@@ -156,7 +256,15 @@ static double stddev_stats(struct stats *stats) | |||
156 | 256 | ||
157 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 257 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
158 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; | 258 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
259 | struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; | ||
260 | struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; | ||
159 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | ||
263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | ||
264 | struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; | ||
265 | struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; | ||
266 | struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | ||
267 | struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | ||
160 | struct stats walltime_nsecs_stats; | 268 | struct stats walltime_nsecs_stats; |
161 | 269 | ||
162 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 270 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
@@ -193,6 +301,37 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
193 | } | 301 | } |
194 | 302 | ||
195 | /* | 303 | /* |
304 | * Update various tracking values we maintain to print | ||
305 | * more semantic information such as miss/hit ratios, | ||
306 | * instruction rates, etc: | ||
307 | */ | ||
308 | static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | ||
309 | { | ||
310 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
311 | update_stats(&runtime_nsecs_stats[0], count[0]); | ||
312 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
313 | update_stats(&runtime_cycles_stats[0], count[0]); | ||
314 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
315 | update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); | ||
316 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
317 | update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); | ||
318 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
319 | update_stats(&runtime_branches_stats[0], count[0]); | ||
320 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
321 | update_stats(&runtime_cacherefs_stats[0], count[0]); | ||
322 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
323 | update_stats(&runtime_l1_dcache_stats[0], count[0]); | ||
324 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
325 | update_stats(&runtime_l1_icache_stats[0], count[0]); | ||
326 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
327 | update_stats(&runtime_ll_cache_stats[0], count[0]); | ||
328 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
329 | update_stats(&runtime_dtlb_cache_stats[0], count[0]); | ||
330 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
331 | update_stats(&runtime_itlb_cache_stats[0], count[0]); | ||
332 | } | ||
333 | |||
334 | /* | ||
196 | * Read out the results of a single counter: | 335 | * Read out the results of a single counter: |
197 | * aggregate counts across CPUs in system-wide mode | 336 | * aggregate counts across CPUs in system-wide mode |
198 | */ | 337 | */ |
@@ -217,12 +356,7 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
217 | /* | 356 | /* |
218 | * Save the full runtime - to allow normalization during printout: | 357 | * Save the full runtime - to allow normalization during printout: |
219 | */ | 358 | */ |
220 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | 359 | update_shadow_stats(counter, count); |
221 | update_stats(&runtime_nsecs_stats[0], count[0]); | ||
222 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
223 | update_stats(&runtime_cycles_stats[0], count[0]); | ||
224 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
225 | update_stats(&runtime_branches_stats[0], count[0]); | ||
226 | 360 | ||
227 | return 0; | 361 | return 0; |
228 | } | 362 | } |
@@ -242,12 +376,7 @@ static int read_counter(struct perf_evsel *counter) | |||
242 | 376 | ||
243 | count = counter->counts->cpu[cpu].values; | 377 | count = counter->counts->cpu[cpu].values; |
244 | 378 | ||
245 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | 379 | update_shadow_stats(counter, count); |
246 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
247 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
248 | update_stats(&runtime_cycles_stats[cpu], count[0]); | ||
249 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
250 | update_stats(&runtime_branches_stats[cpu], count[0]); | ||
251 | } | 380 | } |
252 | 381 | ||
253 | return 0; | 382 | return 0; |
@@ -315,13 +444,18 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
315 | 444 | ||
316 | list_for_each_entry(counter, &evsel_list->entries, node) { | 445 | list_for_each_entry(counter, &evsel_list->entries, node) { |
317 | if (create_perf_stat_counter(counter) < 0) { | 446 | if (create_perf_stat_counter(counter) < 0) { |
318 | if (errno == -EPERM || errno == -EACCES) { | 447 | if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) { |
448 | if (verbose) | ||
449 | ui__warning("%s event is not supported by the kernel.\n", | ||
450 | event_name(counter)); | ||
451 | continue; | ||
452 | } | ||
453 | |||
454 | if (errno == EPERM || errno == EACCES) { | ||
319 | error("You may not have permission to collect %sstats.\n" | 455 | error("You may not have permission to collect %sstats.\n" |
320 | "\t Consider tweaking" | 456 | "\t Consider tweaking" |
321 | " /proc/sys/kernel/perf_event_paranoid or running as root.", | 457 | " /proc/sys/kernel/perf_event_paranoid or running as root.", |
322 | system_wide ? "system-wide " : ""); | 458 | system_wide ? "system-wide " : ""); |
323 | } else if (errno == ENOENT) { | ||
324 | error("%s event is not supported. ", event_name(counter)); | ||
325 | } else { | 459 | } else { |
326 | error("open_counter returned with %d (%s). " | 460 | error("open_counter returned with %d (%s). " |
327 | "/bin/dmesg may provide additional information.\n", | 461 | "/bin/dmesg may provide additional information.\n", |
@@ -372,6 +506,16 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
372 | return WEXITSTATUS(status); | 506 | return WEXITSTATUS(status); |
373 | } | 507 | } |
374 | 508 | ||
509 | static void print_noise_pct(double total, double avg) | ||
510 | { | ||
511 | double pct = 0.0; | ||
512 | |||
513 | if (avg) | ||
514 | pct = 100.0*total/avg; | ||
515 | |||
516 | fprintf(stderr, " ( +-%6.2f%% )", pct); | ||
517 | } | ||
518 | |||
375 | static void print_noise(struct perf_evsel *evsel, double avg) | 519 | static void print_noise(struct perf_evsel *evsel, double avg) |
376 | { | 520 | { |
377 | struct perf_stat *ps; | 521 | struct perf_stat *ps; |
@@ -380,15 +524,14 @@ static void print_noise(struct perf_evsel *evsel, double avg) | |||
380 | return; | 524 | return; |
381 | 525 | ||
382 | ps = evsel->priv; | 526 | ps = evsel->priv; |
383 | fprintf(stderr, " ( +- %7.3f%% )", | 527 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); |
384 | 100 * stddev_stats(&ps->res_stats[0]) / avg); | ||
385 | } | 528 | } |
386 | 529 | ||
387 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | 530 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) |
388 | { | 531 | { |
389 | double msecs = avg / 1e6; | 532 | double msecs = avg / 1e6; |
390 | char cpustr[16] = { '\0', }; | 533 | char cpustr[16] = { '\0', }; |
391 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; | 534 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; |
392 | 535 | ||
393 | if (no_aggr) | 536 | if (no_aggr) |
394 | sprintf(cpustr, "CPU%*d%s", | 537 | sprintf(cpustr, "CPU%*d%s", |
@@ -404,8 +547,191 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
404 | return; | 547 | return; |
405 | 548 | ||
406 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 549 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
407 | fprintf(stderr, " # %10.3f CPUs ", | 550 | fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); |
408 | avg / avg_stats(&walltime_nsecs_stats)); | 551 | } |
552 | |||
553 | static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
554 | { | ||
555 | double total, ratio = 0.0; | ||
556 | const char *color; | ||
557 | |||
558 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
559 | |||
560 | if (total) | ||
561 | ratio = avg / total * 100.0; | ||
562 | |||
563 | color = PERF_COLOR_NORMAL; | ||
564 | if (ratio > 50.0) | ||
565 | color = PERF_COLOR_RED; | ||
566 | else if (ratio > 30.0) | ||
567 | color = PERF_COLOR_MAGENTA; | ||
568 | else if (ratio > 10.0) | ||
569 | color = PERF_COLOR_YELLOW; | ||
570 | |||
571 | fprintf(stderr, " # "); | ||
572 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
573 | fprintf(stderr, " frontend cycles idle "); | ||
574 | } | ||
575 | |||
576 | static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
577 | { | ||
578 | double total, ratio = 0.0; | ||
579 | const char *color; | ||
580 | |||
581 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
582 | |||
583 | if (total) | ||
584 | ratio = avg / total * 100.0; | ||
585 | |||
586 | color = PERF_COLOR_NORMAL; | ||
587 | if (ratio > 75.0) | ||
588 | color = PERF_COLOR_RED; | ||
589 | else if (ratio > 50.0) | ||
590 | color = PERF_COLOR_MAGENTA; | ||
591 | else if (ratio > 20.0) | ||
592 | color = PERF_COLOR_YELLOW; | ||
593 | |||
594 | fprintf(stderr, " # "); | ||
595 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
596 | fprintf(stderr, " backend cycles idle "); | ||
597 | } | ||
598 | |||
599 | static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
600 | { | ||
601 | double total, ratio = 0.0; | ||
602 | const char *color; | ||
603 | |||
604 | total = avg_stats(&runtime_branches_stats[cpu]); | ||
605 | |||
606 | if (total) | ||
607 | ratio = avg / total * 100.0; | ||
608 | |||
609 | color = PERF_COLOR_NORMAL; | ||
610 | if (ratio > 20.0) | ||
611 | color = PERF_COLOR_RED; | ||
612 | else if (ratio > 10.0) | ||
613 | color = PERF_COLOR_MAGENTA; | ||
614 | else if (ratio > 5.0) | ||
615 | color = PERF_COLOR_YELLOW; | ||
616 | |||
617 | fprintf(stderr, " # "); | ||
618 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
619 | fprintf(stderr, " of all branches "); | ||
620 | } | ||
621 | |||
622 | static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
623 | { | ||
624 | double total, ratio = 0.0; | ||
625 | const char *color; | ||
626 | |||
627 | total = avg_stats(&runtime_l1_dcache_stats[cpu]); | ||
628 | |||
629 | if (total) | ||
630 | ratio = avg / total * 100.0; | ||
631 | |||
632 | color = PERF_COLOR_NORMAL; | ||
633 | if (ratio > 20.0) | ||
634 | color = PERF_COLOR_RED; | ||
635 | else if (ratio > 10.0) | ||
636 | color = PERF_COLOR_MAGENTA; | ||
637 | else if (ratio > 5.0) | ||
638 | color = PERF_COLOR_YELLOW; | ||
639 | |||
640 | fprintf(stderr, " # "); | ||
641 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
642 | fprintf(stderr, " of all L1-dcache hits "); | ||
643 | } | ||
644 | |||
645 | static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
646 | { | ||
647 | double total, ratio = 0.0; | ||
648 | const char *color; | ||
649 | |||
650 | total = avg_stats(&runtime_l1_icache_stats[cpu]); | ||
651 | |||
652 | if (total) | ||
653 | ratio = avg / total * 100.0; | ||
654 | |||
655 | color = PERF_COLOR_NORMAL; | ||
656 | if (ratio > 20.0) | ||
657 | color = PERF_COLOR_RED; | ||
658 | else if (ratio > 10.0) | ||
659 | color = PERF_COLOR_MAGENTA; | ||
660 | else if (ratio > 5.0) | ||
661 | color = PERF_COLOR_YELLOW; | ||
662 | |||
663 | fprintf(stderr, " # "); | ||
664 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
665 | fprintf(stderr, " of all L1-icache hits "); | ||
666 | } | ||
667 | |||
668 | static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
669 | { | ||
670 | double total, ratio = 0.0; | ||
671 | const char *color; | ||
672 | |||
673 | total = avg_stats(&runtime_dtlb_cache_stats[cpu]); | ||
674 | |||
675 | if (total) | ||
676 | ratio = avg / total * 100.0; | ||
677 | |||
678 | color = PERF_COLOR_NORMAL; | ||
679 | if (ratio > 20.0) | ||
680 | color = PERF_COLOR_RED; | ||
681 | else if (ratio > 10.0) | ||
682 | color = PERF_COLOR_MAGENTA; | ||
683 | else if (ratio > 5.0) | ||
684 | color = PERF_COLOR_YELLOW; | ||
685 | |||
686 | fprintf(stderr, " # "); | ||
687 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
688 | fprintf(stderr, " of all dTLB cache hits "); | ||
689 | } | ||
690 | |||
691 | static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
692 | { | ||
693 | double total, ratio = 0.0; | ||
694 | const char *color; | ||
695 | |||
696 | total = avg_stats(&runtime_itlb_cache_stats[cpu]); | ||
697 | |||
698 | if (total) | ||
699 | ratio = avg / total * 100.0; | ||
700 | |||
701 | color = PERF_COLOR_NORMAL; | ||
702 | if (ratio > 20.0) | ||
703 | color = PERF_COLOR_RED; | ||
704 | else if (ratio > 10.0) | ||
705 | color = PERF_COLOR_MAGENTA; | ||
706 | else if (ratio > 5.0) | ||
707 | color = PERF_COLOR_YELLOW; | ||
708 | |||
709 | fprintf(stderr, " # "); | ||
710 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
711 | fprintf(stderr, " of all iTLB cache hits "); | ||
712 | } | ||
713 | |||
714 | static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
715 | { | ||
716 | double total, ratio = 0.0; | ||
717 | const char *color; | ||
718 | |||
719 | total = avg_stats(&runtime_ll_cache_stats[cpu]); | ||
720 | |||
721 | if (total) | ||
722 | ratio = avg / total * 100.0; | ||
723 | |||
724 | color = PERF_COLOR_NORMAL; | ||
725 | if (ratio > 20.0) | ||
726 | color = PERF_COLOR_RED; | ||
727 | else if (ratio > 10.0) | ||
728 | color = PERF_COLOR_MAGENTA; | ||
729 | else if (ratio > 5.0) | ||
730 | color = PERF_COLOR_YELLOW; | ||
731 | |||
732 | fprintf(stderr, " # "); | ||
733 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
734 | fprintf(stderr, " of all LL-cache hits "); | ||
409 | } | 735 | } |
410 | 736 | ||
411 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | 737 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
@@ -417,9 +743,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
417 | if (csv_output) | 743 | if (csv_output) |
418 | fmt = "%s%.0f%s%s"; | 744 | fmt = "%s%.0f%s%s"; |
419 | else if (big_num) | 745 | else if (big_num) |
420 | fmt = "%s%'18.0f%s%-24s"; | 746 | fmt = "%s%'18.0f%s%-25s"; |
421 | else | 747 | else |
422 | fmt = "%s%18.0f%s%-24s"; | 748 | fmt = "%s%18.0f%s%-25s"; |
423 | 749 | ||
424 | if (no_aggr) | 750 | if (no_aggr) |
425 | sprintf(cpustr, "CPU%*d%s", | 751 | sprintf(cpustr, "CPU%*d%s", |
@@ -442,23 +768,83 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
442 | if (total) | 768 | if (total) |
443 | ratio = avg / total; | 769 | ratio = avg / total; |
444 | 770 | ||
445 | fprintf(stderr, " # %10.3f IPC ", ratio); | 771 | fprintf(stderr, " # %5.2f insns per cycle ", ratio); |
772 | |||
773 | total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); | ||
774 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); | ||
775 | |||
776 | if (total && avg) { | ||
777 | ratio = total / avg; | ||
778 | fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); | ||
779 | } | ||
780 | |||
446 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | 781 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && |
447 | runtime_branches_stats[cpu].n != 0) { | 782 | runtime_branches_stats[cpu].n != 0) { |
448 | total = avg_stats(&runtime_branches_stats[cpu]); | 783 | print_branch_misses(cpu, evsel, avg); |
784 | } else if ( | ||
785 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
786 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
787 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
788 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
789 | runtime_l1_dcache_stats[cpu].n != 0) { | ||
790 | print_l1_dcache_misses(cpu, evsel, avg); | ||
791 | } else if ( | ||
792 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
793 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
794 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
795 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
796 | runtime_l1_icache_stats[cpu].n != 0) { | ||
797 | print_l1_icache_misses(cpu, evsel, avg); | ||
798 | } else if ( | ||
799 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
800 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
801 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
802 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
803 | runtime_dtlb_cache_stats[cpu].n != 0) { | ||
804 | print_dtlb_cache_misses(cpu, evsel, avg); | ||
805 | } else if ( | ||
806 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
807 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
808 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
809 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
810 | runtime_itlb_cache_stats[cpu].n != 0) { | ||
811 | print_itlb_cache_misses(cpu, evsel, avg); | ||
812 | } else if ( | ||
813 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
814 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
815 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
816 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
817 | runtime_ll_cache_stats[cpu].n != 0) { | ||
818 | print_ll_cache_misses(cpu, evsel, avg); | ||
819 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
820 | runtime_cacherefs_stats[cpu].n != 0) { | ||
821 | total = avg_stats(&runtime_cacherefs_stats[cpu]); | ||
449 | 822 | ||
450 | if (total) | 823 | if (total) |
451 | ratio = avg * 100 / total; | 824 | ratio = avg * 100 / total; |
452 | 825 | ||
453 | fprintf(stderr, " # %10.3f %% ", ratio); | 826 | fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); |
827 | |||
828 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
829 | print_stalled_cycles_frontend(cpu, evsel, avg); | ||
830 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
831 | print_stalled_cycles_backend(cpu, evsel, avg); | ||
832 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
833 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
454 | 834 | ||
835 | if (total) | ||
836 | ratio = 1.0 * avg / total; | ||
837 | |||
838 | fprintf(stderr, " # %8.3f GHz ", ratio); | ||
455 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 839 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
456 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 840 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
457 | 841 | ||
458 | if (total) | 842 | if (total) |
459 | ratio = 1000.0 * avg / total; | 843 | ratio = 1000.0 * avg / total; |
460 | 844 | ||
461 | fprintf(stderr, " # %10.3f M/sec", ratio); | 845 | fprintf(stderr, " # %8.3f M/sec ", ratio); |
846 | } else { | ||
847 | fprintf(stderr, " "); | ||
462 | } | 848 | } |
463 | } | 849 | } |
464 | 850 | ||
@@ -505,8 +891,7 @@ static void print_counter_aggr(struct perf_evsel *counter) | |||
505 | avg_enabled = avg_stats(&ps->res_stats[1]); | 891 | avg_enabled = avg_stats(&ps->res_stats[1]); |
506 | avg_running = avg_stats(&ps->res_stats[2]); | 892 | avg_running = avg_stats(&ps->res_stats[2]); |
507 | 893 | ||
508 | fprintf(stderr, " (scaled from %.2f%%)", | 894 | fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled); |
509 | 100 * avg_running / avg_enabled); | ||
510 | } | 895 | } |
511 | fprintf(stderr, "\n"); | 896 | fprintf(stderr, "\n"); |
512 | } | 897 | } |
@@ -548,10 +933,8 @@ static void print_counter(struct perf_evsel *counter) | |||
548 | if (!csv_output) { | 933 | if (!csv_output) { |
549 | print_noise(counter, 1.0); | 934 | print_noise(counter, 1.0); |
550 | 935 | ||
551 | if (run != ena) { | 936 | if (run != ena) |
552 | fprintf(stderr, " (scaled from %.2f%%)", | 937 | fprintf(stderr, " (%.2f%%)", 100.0 * run / ena); |
553 | 100.0 * run / ena); | ||
554 | } | ||
555 | } | 938 | } |
556 | fputc('\n', stderr); | 939 | fputc('\n', stderr); |
557 | } | 940 | } |
@@ -591,13 +974,14 @@ static void print_stat(int argc, const char **argv) | |||
591 | } | 974 | } |
592 | 975 | ||
593 | if (!csv_output) { | 976 | if (!csv_output) { |
594 | fprintf(stderr, "\n"); | 977 | if (!null_run) |
595 | fprintf(stderr, " %18.9f seconds time elapsed", | 978 | fprintf(stderr, "\n"); |
979 | fprintf(stderr, " %17.9f seconds time elapsed", | ||
596 | avg_stats(&walltime_nsecs_stats)/1e9); | 980 | avg_stats(&walltime_nsecs_stats)/1e9); |
597 | if (run_count > 1) { | 981 | if (run_count > 1) { |
598 | fprintf(stderr, " ( +- %7.3f%% )", | 982 | fprintf(stderr, " "); |
599 | 100*stddev_stats(&walltime_nsecs_stats) / | 983 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), |
600 | avg_stats(&walltime_nsecs_stats)); | 984 | avg_stats(&walltime_nsecs_stats)); |
601 | } | 985 | } |
602 | fprintf(stderr, "\n\n"); | 986 | fprintf(stderr, "\n\n"); |
603 | } | 987 | } |
@@ -659,6 +1043,10 @@ static const struct option options[] = { | |||
659 | "repeat command and print average + stddev (max: 100)"), | 1043 | "repeat command and print average + stddev (max: 100)"), |
660 | OPT_BOOLEAN('n', "null", &null_run, | 1044 | OPT_BOOLEAN('n', "null", &null_run, |
661 | "null run - dont start any counters"), | 1045 | "null run - dont start any counters"), |
1046 | OPT_INCR('d', "detailed", &detailed_run, | ||
1047 | "detailed run - start a lot of events"), | ||
1048 | OPT_BOOLEAN('S', "sync", &sync_run, | ||
1049 | "call sync() before starting a run"), | ||
662 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, | 1050 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, |
663 | "print large numbers with thousands\' separators", | 1051 | "print large numbers with thousands\' separators", |
664 | stat__set_big_num), | 1052 | stat__set_big_num), |
@@ -674,6 +1062,70 @@ static const struct option options[] = { | |||
674 | OPT_END() | 1062 | OPT_END() |
675 | }; | 1063 | }; |
676 | 1064 | ||
1065 | /* | ||
1066 | * Add default attributes, if there were no attributes specified or | ||
1067 | * if -d/--detailed, -d -d or -d -d -d is used: | ||
1068 | */ | ||
1069 | static int add_default_attributes(void) | ||
1070 | { | ||
1071 | struct perf_evsel *pos; | ||
1072 | size_t attr_nr = 0; | ||
1073 | size_t c; | ||
1074 | |||
1075 | /* Set attrs if no event is selected and !null_run: */ | ||
1076 | if (null_run) | ||
1077 | return 0; | ||
1078 | |||
1079 | if (!evsel_list->nr_entries) { | ||
1080 | for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { | ||
1081 | pos = perf_evsel__new(default_attrs + c, c + attr_nr); | ||
1082 | if (pos == NULL) | ||
1083 | return -1; | ||
1084 | perf_evlist__add(evsel_list, pos); | ||
1085 | } | ||
1086 | attr_nr += c; | ||
1087 | } | ||
1088 | |||
1089 | /* Detailed events get appended to the event list: */ | ||
1090 | |||
1091 | if (detailed_run < 1) | ||
1092 | return 0; | ||
1093 | |||
1094 | /* Append detailed run extra attributes: */ | ||
1095 | for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { | ||
1096 | pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); | ||
1097 | if (pos == NULL) | ||
1098 | return -1; | ||
1099 | perf_evlist__add(evsel_list, pos); | ||
1100 | } | ||
1101 | attr_nr += c; | ||
1102 | |||
1103 | if (detailed_run < 2) | ||
1104 | return 0; | ||
1105 | |||
1106 | /* Append very detailed run extra attributes: */ | ||
1107 | for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { | ||
1108 | pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); | ||
1109 | if (pos == NULL) | ||
1110 | return -1; | ||
1111 | perf_evlist__add(evsel_list, pos); | ||
1112 | } | ||
1113 | |||
1114 | if (detailed_run < 3) | ||
1115 | return 0; | ||
1116 | |||
1117 | /* Append very, very detailed run extra attributes: */ | ||
1118 | for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { | ||
1119 | pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); | ||
1120 | if (pos == NULL) | ||
1121 | return -1; | ||
1122 | perf_evlist__add(evsel_list, pos); | ||
1123 | } | ||
1124 | |||
1125 | |||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
677 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 1129 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
678 | { | 1130 | { |
679 | struct perf_evsel *pos; | 1131 | struct perf_evsel *pos; |
@@ -719,17 +1171,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
719 | usage_with_options(stat_usage, options); | 1171 | usage_with_options(stat_usage, options); |
720 | } | 1172 | } |
721 | 1173 | ||
722 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 1174 | if (add_default_attributes()) |
723 | if (!null_run && !evsel_list->nr_entries) { | 1175 | goto out; |
724 | size_t c; | ||
725 | |||
726 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { | ||
727 | pos = perf_evsel__new(&default_attrs[c], c); | ||
728 | if (pos == NULL) | ||
729 | goto out; | ||
730 | perf_evlist__add(evsel_list, pos); | ||
731 | } | ||
732 | } | ||
733 | 1176 | ||
734 | if (target_pid != -1) | 1177 | if (target_pid != -1) |
735 | target_tid = target_pid; | 1178 | target_tid = target_pid; |
@@ -773,6 +1216,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
773 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 1216 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
774 | if (run_count != 1 && verbose) | 1217 | if (run_count != 1 && verbose) |
775 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); | 1218 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
1219 | |||
1220 | if (sync_run) | ||
1221 | sync(); | ||
1222 | |||
776 | status = run_perf_stat(argc, argv); | 1223 | status = run_perf_stat(argc, argv); |
777 | } | 1224 | } |
778 | 1225 | ||