diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-05-06 15:07:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-06 15:07:38 -0400 |
commit | 57d524154ffe99d27fb55e0e30ddbad9f4c35806 (patch) | |
tree | 406ce0543fa294d41ab8204ca531ab18453e5a1a /tools | |
parent | e04d1b23f9706186187dcb0be1a752e48dcc540b (diff) | |
parent | c63ca0c01d73563d4e2ab174bb3dd1e5efb907e6 (diff) |
Merge branch 'perf/stat' into perf/core
Merge reason: the perf stat improvements are tested and ready now.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-stat.c | 284 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 120 | ||||
-rw-r--r-- | tools/perf/util/python.c | 3 |
3 files changed, 321 insertions, 86 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 03f0e45f1479..602c3c96fa1e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "util/evlist.h" | 46 | #include "util/evlist.h" |
47 | #include "util/evsel.h" | 47 | #include "util/evsel.h" |
48 | #include "util/debug.h" | 48 | #include "util/debug.h" |
49 | #include "util/color.h" | ||
49 | #include "util/header.h" | 50 | #include "util/header.h" |
50 | #include "util/cpumap.h" | 51 | #include "util/cpumap.h" |
51 | #include "util/thread.h" | 52 | #include "util/thread.h" |
@@ -65,14 +66,56 @@ static struct perf_event_attr default_attrs[] = { | |||
65 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | 66 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, |
66 | 67 | ||
67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | 68 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
69 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | ||
70 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | ||
68 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | 71 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
69 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 72 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
70 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | 73 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, |
71 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, | ||
72 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
73 | 74 | ||
74 | }; | 75 | }; |
75 | 76 | ||
77 | /* | ||
78 | * Detailed stats: | ||
79 | */ | ||
80 | static struct perf_event_attr detailed_attrs[] = { | ||
81 | |||
82 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | ||
83 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, | ||
84 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, | ||
85 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | ||
86 | |||
87 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | ||
88 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | ||
89 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | ||
90 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | ||
91 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
92 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | ||
93 | |||
94 | { .type = PERF_TYPE_HW_CACHE, | ||
95 | .config = | ||
96 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
97 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
98 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
99 | |||
100 | { .type = PERF_TYPE_HW_CACHE, | ||
101 | .config = | ||
102 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
103 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
104 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
105 | |||
106 | { .type = PERF_TYPE_HW_CACHE, | ||
107 | .config = | ||
108 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
109 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
110 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
111 | |||
112 | { .type = PERF_TYPE_HW_CACHE, | ||
113 | .config = | ||
114 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
115 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
116 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
117 | }; | ||
118 | |||
76 | struct perf_evlist *evsel_list; | 119 | struct perf_evlist *evsel_list; |
77 | 120 | ||
78 | static bool system_wide = false; | 121 | static bool system_wide = false; |
@@ -86,6 +129,8 @@ static pid_t target_pid = -1; | |||
86 | static pid_t target_tid = -1; | 129 | static pid_t target_tid = -1; |
87 | static pid_t child_pid = -1; | 130 | static pid_t child_pid = -1; |
88 | static bool null_run = false; | 131 | static bool null_run = false; |
132 | static bool detailed_run = false; | ||
133 | static bool sync_run = false; | ||
89 | static bool big_num = true; | 134 | static bool big_num = true; |
90 | static int big_num_opt = -1; | 135 | static int big_num_opt = -1; |
91 | static const char *cpu_list; | 136 | static const char *cpu_list; |
@@ -156,7 +201,11 @@ static double stddev_stats(struct stats *stats) | |||
156 | 201 | ||
157 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 202 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
158 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; | 203 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
204 | struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; | ||
205 | struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; | ||
159 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 206 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
207 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | ||
208 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | ||
160 | struct stats walltime_nsecs_stats; | 209 | struct stats walltime_nsecs_stats; |
161 | 210 | ||
162 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 211 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
@@ -193,6 +242,29 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
193 | } | 242 | } |
194 | 243 | ||
195 | /* | 244 | /* |
245 | * Update various tracking values we maintain to print | ||
246 | * more semantic information such as miss/hit ratios, | ||
247 | * instruction rates, etc: | ||
248 | */ | ||
249 | static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | ||
250 | { | ||
251 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
252 | update_stats(&runtime_nsecs_stats[0], count[0]); | ||
253 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
254 | update_stats(&runtime_cycles_stats[0], count[0]); | ||
255 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
256 | update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); | ||
257 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
258 | update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); | ||
259 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
260 | update_stats(&runtime_branches_stats[0], count[0]); | ||
261 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
262 | update_stats(&runtime_cacherefs_stats[0], count[0]); | ||
263 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
264 | update_stats(&runtime_l1_dcache_stats[0], count[0]); | ||
265 | } | ||
266 | |||
267 | /* | ||
196 | * Read out the results of a single counter: | 268 | * Read out the results of a single counter: |
197 | * aggregate counts across CPUs in system-wide mode | 269 | * aggregate counts across CPUs in system-wide mode |
198 | */ | 270 | */ |
@@ -217,12 +289,7 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
217 | /* | 289 | /* |
218 | * Save the full runtime - to allow normalization during printout: | 290 | * Save the full runtime - to allow normalization during printout: |
219 | */ | 291 | */ |
220 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | 292 | update_shadow_stats(counter, count); |
221 | update_stats(&runtime_nsecs_stats[0], count[0]); | ||
222 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
223 | update_stats(&runtime_cycles_stats[0], count[0]); | ||
224 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
225 | update_stats(&runtime_branches_stats[0], count[0]); | ||
226 | 293 | ||
227 | return 0; | 294 | return 0; |
228 | } | 295 | } |
@@ -242,12 +309,7 @@ static int read_counter(struct perf_evsel *counter) | |||
242 | 309 | ||
243 | count = counter->counts->cpu[cpu].values; | 310 | count = counter->counts->cpu[cpu].values; |
244 | 311 | ||
245 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | 312 | update_shadow_stats(counter, count); |
246 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
247 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
248 | update_stats(&runtime_cycles_stats[cpu], count[0]); | ||
249 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
250 | update_stats(&runtime_branches_stats[cpu], count[0]); | ||
251 | } | 313 | } |
252 | 314 | ||
253 | return 0; | 315 | return 0; |
@@ -315,13 +377,18 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
315 | 377 | ||
316 | list_for_each_entry(counter, &evsel_list->entries, node) { | 378 | list_for_each_entry(counter, &evsel_list->entries, node) { |
317 | if (create_perf_stat_counter(counter) < 0) { | 379 | if (create_perf_stat_counter(counter) < 0) { |
318 | if (errno == -EPERM || errno == -EACCES) { | 380 | if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) { |
381 | if (verbose) | ||
382 | ui__warning("%s event is not supported by the kernel.\n", | ||
383 | event_name(counter)); | ||
384 | continue; | ||
385 | } | ||
386 | |||
387 | if (errno == EPERM || errno == EACCES) { | ||
319 | error("You may not have permission to collect %sstats.\n" | 388 | error("You may not have permission to collect %sstats.\n" |
320 | "\t Consider tweaking" | 389 | "\t Consider tweaking" |
321 | " /proc/sys/kernel/perf_event_paranoid or running as root.", | 390 | " /proc/sys/kernel/perf_event_paranoid or running as root.", |
322 | system_wide ? "system-wide " : ""); | 391 | system_wide ? "system-wide " : ""); |
323 | } else if (errno == ENOENT) { | ||
324 | error("%s event is not supported. ", event_name(counter)); | ||
325 | } else { | 392 | } else { |
326 | error("open_counter returned with %d (%s). " | 393 | error("open_counter returned with %d (%s). " |
327 | "/bin/dmesg may provide additional information.\n", | 394 | "/bin/dmesg may provide additional information.\n", |
@@ -372,6 +439,16 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
372 | return WEXITSTATUS(status); | 439 | return WEXITSTATUS(status); |
373 | } | 440 | } |
374 | 441 | ||
442 | static void print_noise_pct(double total, double avg) | ||
443 | { | ||
444 | double pct = 0.0; | ||
445 | |||
446 | if (avg) | ||
447 | pct = 100.0*total/avg; | ||
448 | |||
449 | fprintf(stderr, " ( +-%6.2f%% )", pct); | ||
450 | } | ||
451 | |||
375 | static void print_noise(struct perf_evsel *evsel, double avg) | 452 | static void print_noise(struct perf_evsel *evsel, double avg) |
376 | { | 453 | { |
377 | struct perf_stat *ps; | 454 | struct perf_stat *ps; |
@@ -380,8 +457,7 @@ static void print_noise(struct perf_evsel *evsel, double avg) | |||
380 | return; | 457 | return; |
381 | 458 | ||
382 | ps = evsel->priv; | 459 | ps = evsel->priv; |
383 | fprintf(stderr, " ( +- %7.3f%% )", | 460 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); |
384 | 100 * stddev_stats(&ps->res_stats[0]) / avg); | ||
385 | } | 461 | } |
386 | 462 | ||
387 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | 463 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) |
@@ -404,8 +480,99 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
404 | return; | 480 | return; |
405 | 481 | ||
406 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 482 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
407 | fprintf(stderr, " # %10.3f CPUs ", | 483 | fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); |
408 | avg / avg_stats(&walltime_nsecs_stats)); | 484 | } |
485 | |||
486 | static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
487 | { | ||
488 | double total, ratio = 0.0; | ||
489 | const char *color; | ||
490 | |||
491 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
492 | |||
493 | if (total) | ||
494 | ratio = avg / total * 100.0; | ||
495 | |||
496 | color = PERF_COLOR_NORMAL; | ||
497 | if (ratio > 50.0) | ||
498 | color = PERF_COLOR_RED; | ||
499 | else if (ratio > 30.0) | ||
500 | color = PERF_COLOR_MAGENTA; | ||
501 | else if (ratio > 10.0) | ||
502 | color = PERF_COLOR_YELLOW; | ||
503 | |||
504 | fprintf(stderr, " # "); | ||
505 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
506 | fprintf(stderr, " frontend cycles idle "); | ||
507 | } | ||
508 | |||
509 | static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
510 | { | ||
511 | double total, ratio = 0.0; | ||
512 | const char *color; | ||
513 | |||
514 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
515 | |||
516 | if (total) | ||
517 | ratio = avg / total * 100.0; | ||
518 | |||
519 | color = PERF_COLOR_NORMAL; | ||
520 | if (ratio > 75.0) | ||
521 | color = PERF_COLOR_RED; | ||
522 | else if (ratio > 50.0) | ||
523 | color = PERF_COLOR_MAGENTA; | ||
524 | else if (ratio > 20.0) | ||
525 | color = PERF_COLOR_YELLOW; | ||
526 | |||
527 | fprintf(stderr, " # "); | ||
528 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
529 | fprintf(stderr, " backend cycles idle "); | ||
530 | } | ||
531 | |||
532 | static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
533 | { | ||
534 | double total, ratio = 0.0; | ||
535 | const char *color; | ||
536 | |||
537 | total = avg_stats(&runtime_branches_stats[cpu]); | ||
538 | |||
539 | if (total) | ||
540 | ratio = avg / total * 100.0; | ||
541 | |||
542 | color = PERF_COLOR_NORMAL; | ||
543 | if (ratio > 20.0) | ||
544 | color = PERF_COLOR_RED; | ||
545 | else if (ratio > 10.0) | ||
546 | color = PERF_COLOR_MAGENTA; | ||
547 | else if (ratio > 5.0) | ||
548 | color = PERF_COLOR_YELLOW; | ||
549 | |||
550 | fprintf(stderr, " # "); | ||
551 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
552 | fprintf(stderr, " of all branches "); | ||
553 | } | ||
554 | |||
555 | static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
556 | { | ||
557 | double total, ratio = 0.0; | ||
558 | const char *color; | ||
559 | |||
560 | total = avg_stats(&runtime_l1_dcache_stats[cpu]); | ||
561 | |||
562 | if (total) | ||
563 | ratio = avg / total * 100.0; | ||
564 | |||
565 | color = PERF_COLOR_NORMAL; | ||
566 | if (ratio > 20.0) | ||
567 | color = PERF_COLOR_RED; | ||
568 | else if (ratio > 10.0) | ||
569 | color = PERF_COLOR_MAGENTA; | ||
570 | else if (ratio > 5.0) | ||
571 | color = PERF_COLOR_YELLOW; | ||
572 | |||
573 | fprintf(stderr, " # "); | ||
574 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
575 | fprintf(stderr, " of all L1-dcache hits "); | ||
409 | } | 576 | } |
410 | 577 | ||
411 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | 578 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
@@ -442,23 +609,55 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
442 | if (total) | 609 | if (total) |
443 | ratio = avg / total; | 610 | ratio = avg / total; |
444 | 611 | ||
445 | fprintf(stderr, " # %10.3f IPC ", ratio); | 612 | fprintf(stderr, " # %5.2f insns per cycle ", ratio); |
613 | |||
614 | total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); | ||
615 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); | ||
616 | |||
617 | if (total && avg) { | ||
618 | ratio = total / avg; | ||
619 | fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); | ||
620 | } | ||
621 | |||
446 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | 622 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && |
447 | runtime_branches_stats[cpu].n != 0) { | 623 | runtime_branches_stats[cpu].n != 0) { |
448 | total = avg_stats(&runtime_branches_stats[cpu]); | 624 | print_branch_misses(cpu, evsel, avg); |
625 | } else if ( | ||
626 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
627 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
628 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
629 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
630 | runtime_l1_dcache_stats[cpu].n != 0) { | ||
631 | print_l1_dcache_misses(cpu, evsel, avg); | ||
632 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
633 | runtime_cacherefs_stats[cpu].n != 0) { | ||
634 | total = avg_stats(&runtime_cacherefs_stats[cpu]); | ||
449 | 635 | ||
450 | if (total) | 636 | if (total) |
451 | ratio = avg * 100 / total; | 637 | ratio = avg * 100 / total; |
452 | 638 | ||
453 | fprintf(stderr, " # %10.3f %% ", ratio); | 639 | fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); |
454 | 640 | ||
641 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
642 | print_stalled_cycles_frontend(cpu, evsel, avg); | ||
643 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
644 | print_stalled_cycles_backend(cpu, evsel, avg); | ||
645 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
646 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
647 | |||
648 | if (total) | ||
649 | ratio = 1.0 * avg / total; | ||
650 | |||
651 | fprintf(stderr, " # %8.3f GHz ", ratio); | ||
455 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 652 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
456 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 653 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
457 | 654 | ||
458 | if (total) | 655 | if (total) |
459 | ratio = 1000.0 * avg / total; | 656 | ratio = 1000.0 * avg / total; |
460 | 657 | ||
461 | fprintf(stderr, " # %10.3f M/sec", ratio); | 658 | fprintf(stderr, " # %8.3f M/sec ", ratio); |
659 | } else { | ||
660 | fprintf(stderr, " "); | ||
462 | } | 661 | } |
463 | } | 662 | } |
464 | 663 | ||
@@ -505,8 +704,7 @@ static void print_counter_aggr(struct perf_evsel *counter) | |||
505 | avg_enabled = avg_stats(&ps->res_stats[1]); | 704 | avg_enabled = avg_stats(&ps->res_stats[1]); |
506 | avg_running = avg_stats(&ps->res_stats[2]); | 705 | avg_running = avg_stats(&ps->res_stats[2]); |
507 | 706 | ||
508 | fprintf(stderr, " (scaled from %.2f%%)", | 707 | fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled); |
509 | 100 * avg_running / avg_enabled); | ||
510 | } | 708 | } |
511 | fprintf(stderr, "\n"); | 709 | fprintf(stderr, "\n"); |
512 | } | 710 | } |
@@ -548,10 +746,8 @@ static void print_counter(struct perf_evsel *counter) | |||
548 | if (!csv_output) { | 746 | if (!csv_output) { |
549 | print_noise(counter, 1.0); | 747 | print_noise(counter, 1.0); |
550 | 748 | ||
551 | if (run != ena) { | 749 | if (run != ena) |
552 | fprintf(stderr, " (scaled from %.2f%%)", | 750 | fprintf(stderr, " (%.2f%%)", 100.0 * run / ena); |
553 | 100.0 * run / ena); | ||
554 | } | ||
555 | } | 751 | } |
556 | fputc('\n', stderr); | 752 | fputc('\n', stderr); |
557 | } | 753 | } |
@@ -595,9 +791,8 @@ static void print_stat(int argc, const char **argv) | |||
595 | fprintf(stderr, " %18.9f seconds time elapsed", | 791 | fprintf(stderr, " %18.9f seconds time elapsed", |
596 | avg_stats(&walltime_nsecs_stats)/1e9); | 792 | avg_stats(&walltime_nsecs_stats)/1e9); |
597 | if (run_count > 1) { | 793 | if (run_count > 1) { |
598 | fprintf(stderr, " ( +- %7.3f%% )", | 794 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), |
599 | 100*stddev_stats(&walltime_nsecs_stats) / | 795 | avg_stats(&walltime_nsecs_stats)); |
600 | avg_stats(&walltime_nsecs_stats)); | ||
601 | } | 796 | } |
602 | fprintf(stderr, "\n\n"); | 797 | fprintf(stderr, "\n\n"); |
603 | } | 798 | } |
@@ -659,6 +854,10 @@ static const struct option options[] = { | |||
659 | "repeat command and print average + stddev (max: 100)"), | 854 | "repeat command and print average + stddev (max: 100)"), |
660 | OPT_BOOLEAN('n', "null", &null_run, | 855 | OPT_BOOLEAN('n', "null", &null_run, |
661 | "null run - dont start any counters"), | 856 | "null run - dont start any counters"), |
857 | OPT_BOOLEAN('d', "detailed", &detailed_run, | ||
858 | "detailed run - start a lot of events"), | ||
859 | OPT_BOOLEAN('S', "sync", &sync_run, | ||
860 | "call sync() before starting a run"), | ||
662 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, | 861 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, |
663 | "print large numbers with thousands\' separators", | 862 | "print large numbers with thousands\' separators", |
664 | stat__set_big_num), | 863 | stat__set_big_num), |
@@ -720,7 +919,18 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
720 | } | 919 | } |
721 | 920 | ||
722 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 921 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
723 | if (!null_run && !evsel_list->nr_entries) { | 922 | if (detailed_run) { |
923 | size_t c; | ||
924 | |||
925 | for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) { | ||
926 | pos = perf_evsel__new(&detailed_attrs[c], c); | ||
927 | if (pos == NULL) | ||
928 | goto out; | ||
929 | perf_evlist__add(evsel_list, pos); | ||
930 | } | ||
931 | } | ||
932 | /* Set attrs and nr_counters if no event is selected and !null_run */ | ||
933 | if (!detailed_run && !null_run && !evsel_list->nr_entries) { | ||
724 | size_t c; | 934 | size_t c; |
725 | 935 | ||
726 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { | 936 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { |
@@ -773,6 +983,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
773 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 983 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
774 | if (run_count != 1 && verbose) | 984 | if (run_count != 1 && verbose) |
775 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); | 985 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
986 | |||
987 | if (sync_run) | ||
988 | sync(); | ||
989 | |||
776 | status = run_perf_stat(argc, argv); | 990 | status = run_perf_stat(argc, argv); |
777 | } | 991 | } |
778 | 992 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 952b4ae3d954..ffa493a24333 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -31,34 +31,36 @@ char debugfs_path[MAXPATHLEN]; | |||
31 | #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x | 31 | #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x |
32 | 32 | ||
33 | static struct event_symbol event_symbols[] = { | 33 | static struct event_symbol event_symbols[] = { |
34 | { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, | 34 | { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, |
35 | { CHW(INSTRUCTIONS), "instructions", "" }, | 35 | { CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" }, |
36 | { CHW(CACHE_REFERENCES), "cache-references", "" }, | 36 | { CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" }, |
37 | { CHW(CACHE_MISSES), "cache-misses", "" }, | 37 | { CHW(INSTRUCTIONS), "instructions", "" }, |
38 | { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, | 38 | { CHW(CACHE_REFERENCES), "cache-references", "" }, |
39 | { CHW(BRANCH_MISSES), "branch-misses", "" }, | 39 | { CHW(CACHE_MISSES), "cache-misses", "" }, |
40 | { CHW(BUS_CYCLES), "bus-cycles", "" }, | 40 | { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, |
41 | 41 | { CHW(BRANCH_MISSES), "branch-misses", "" }, | |
42 | { CSW(CPU_CLOCK), "cpu-clock", "" }, | 42 | { CHW(BUS_CYCLES), "bus-cycles", "" }, |
43 | { CSW(TASK_CLOCK), "task-clock", "" }, | 43 | |
44 | { CSW(PAGE_FAULTS), "page-faults", "faults" }, | 44 | { CSW(CPU_CLOCK), "cpu-clock", "" }, |
45 | { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, | 45 | { CSW(TASK_CLOCK), "task-clock", "" }, |
46 | { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, | 46 | { CSW(PAGE_FAULTS), "page-faults", "faults" }, |
47 | { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, | 47 | { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, |
48 | { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, | 48 | { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, |
49 | { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, | 49 | { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, |
50 | { CSW(EMULATION_FAULTS), "emulation-faults", "" }, | 50 | { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, |
51 | { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, | ||
52 | { CSW(EMULATION_FAULTS), "emulation-faults", "" }, | ||
51 | }; | 53 | }; |
52 | 54 | ||
53 | #define __PERF_EVENT_FIELD(config, name) \ | 55 | #define __PERF_EVENT_FIELD(config, name) \ |
54 | ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) | 56 | ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) |
55 | 57 | ||
56 | #define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) | 58 | #define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) |
57 | #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) | 59 | #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) |
58 | #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) | 60 | #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) |
59 | #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) | 61 | #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) |
60 | 62 | ||
61 | static const char *hw_event_names[] = { | 63 | static const char *hw_event_names[PERF_COUNT_HW_MAX] = { |
62 | "cycles", | 64 | "cycles", |
63 | "instructions", | 65 | "instructions", |
64 | "cache-references", | 66 | "cache-references", |
@@ -66,11 +68,13 @@ static const char *hw_event_names[] = { | |||
66 | "branches", | 68 | "branches", |
67 | "branch-misses", | 69 | "branch-misses", |
68 | "bus-cycles", | 70 | "bus-cycles", |
71 | "stalled-cycles-frontend", | ||
72 | "stalled-cycles-backend", | ||
69 | }; | 73 | }; |
70 | 74 | ||
71 | static const char *sw_event_names[] = { | 75 | static const char *sw_event_names[PERF_COUNT_SW_MAX] = { |
72 | "cpu-clock-msecs", | 76 | "cpu-clock", |
73 | "task-clock-msecs", | 77 | "task-clock", |
74 | "page-faults", | 78 | "page-faults", |
75 | "context-switches", | 79 | "context-switches", |
76 | "CPU-migrations", | 80 | "CPU-migrations", |
@@ -307,7 +311,7 @@ const char *__event_name(int type, u64 config) | |||
307 | 311 | ||
308 | switch (type) { | 312 | switch (type) { |
309 | case PERF_TYPE_HARDWARE: | 313 | case PERF_TYPE_HARDWARE: |
310 | if (config < PERF_COUNT_HW_MAX) | 314 | if (config < PERF_COUNT_HW_MAX && hw_event_names[config]) |
311 | return hw_event_names[config]; | 315 | return hw_event_names[config]; |
312 | return "unknown-hardware"; | 316 | return "unknown-hardware"; |
313 | 317 | ||
@@ -333,7 +337,7 @@ const char *__event_name(int type, u64 config) | |||
333 | } | 337 | } |
334 | 338 | ||
335 | case PERF_TYPE_SOFTWARE: | 339 | case PERF_TYPE_SOFTWARE: |
336 | if (config < PERF_COUNT_SW_MAX) | 340 | if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) |
337 | return sw_event_names[config]; | 341 | return sw_event_names[config]; |
338 | return "unknown-software"; | 342 | return "unknown-software"; |
339 | 343 | ||
@@ -648,13 +652,15 @@ static int check_events(const char *str, unsigned int i) | |||
648 | int n; | 652 | int n; |
649 | 653 | ||
650 | n = strlen(event_symbols[i].symbol); | 654 | n = strlen(event_symbols[i].symbol); |
651 | if (!strncmp(str, event_symbols[i].symbol, n)) | 655 | if (!strncasecmp(str, event_symbols[i].symbol, n)) |
652 | return n; | 656 | return n; |
653 | 657 | ||
654 | n = strlen(event_symbols[i].alias); | 658 | n = strlen(event_symbols[i].alias); |
655 | if (n) | 659 | if (n) { |
656 | if (!strncmp(str, event_symbols[i].alias, n)) | 660 | if (!strncasecmp(str, event_symbols[i].alias, n)) |
657 | return n; | 661 | return n; |
662 | } | ||
663 | |||
658 | return 0; | 664 | return 0; |
659 | } | 665 | } |
660 | 666 | ||
@@ -718,15 +724,19 @@ parse_numeric_event(const char **strp, struct perf_event_attr *attr) | |||
718 | return EVT_FAILED; | 724 | return EVT_FAILED; |
719 | } | 725 | } |
720 | 726 | ||
721 | static enum event_result | 727 | static int |
722 | parse_event_modifier(const char **strp, struct perf_event_attr *attr) | 728 | parse_event_modifier(const char **strp, struct perf_event_attr *attr) |
723 | { | 729 | { |
724 | const char *str = *strp; | 730 | const char *str = *strp; |
725 | int exclude = 0; | 731 | int exclude = 0; |
726 | int eu = 0, ek = 0, eh = 0, precise = 0; | 732 | int eu = 0, ek = 0, eh = 0, precise = 0; |
727 | 733 | ||
728 | if (*str++ != ':') | 734 | if (!*str) |
729 | return 0; | 735 | return 0; |
736 | |||
737 | if (*str++ != ':') | ||
738 | return -1; | ||
739 | |||
730 | while (*str) { | 740 | while (*str) { |
731 | if (*str == 'u') { | 741 | if (*str == 'u') { |
732 | if (!exclude) | 742 | if (!exclude) |
@@ -747,14 +757,16 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) | |||
747 | 757 | ||
748 | ++str; | 758 | ++str; |
749 | } | 759 | } |
750 | if (str >= *strp + 2) { | 760 | if (str < *strp + 2) |
751 | *strp = str; | 761 | return -1; |
752 | attr->exclude_user = eu; | 762 | |
753 | attr->exclude_kernel = ek; | 763 | *strp = str; |
754 | attr->exclude_hv = eh; | 764 | |
755 | attr->precise_ip = precise; | 765 | attr->exclude_user = eu; |
756 | return 1; | 766 | attr->exclude_kernel = ek; |
757 | } | 767 | attr->exclude_hv = eh; |
768 | attr->precise_ip = precise; | ||
769 | |||
758 | return 0; | 770 | return 0; |
759 | } | 771 | } |
760 | 772 | ||
@@ -797,7 +809,12 @@ parse_event_symbols(const struct option *opt, const char **str, | |||
797 | return EVT_FAILED; | 809 | return EVT_FAILED; |
798 | 810 | ||
799 | modifier: | 811 | modifier: |
800 | parse_event_modifier(str, attr); | 812 | if (parse_event_modifier(str, attr) < 0) { |
813 | fprintf(stderr, "invalid event modifier: '%s'\n", *str); | ||
814 | fprintf(stderr, "Run 'perf list' for a list of valid events and modifiers\n"); | ||
815 | |||
816 | return EVT_FAILED; | ||
817 | } | ||
801 | 818 | ||
802 | return ret; | 819 | return ret; |
803 | } | 820 | } |
@@ -912,7 +929,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) | |||
912 | 929 | ||
913 | snprintf(evt_path, MAXPATHLEN, "%s:%s", | 930 | snprintf(evt_path, MAXPATHLEN, "%s:%s", |
914 | sys_dirent.d_name, evt_dirent.d_name); | 931 | sys_dirent.d_name, evt_dirent.d_name); |
915 | printf(" %-42s [%s]\n", evt_path, | 932 | printf(" %-50s [%s]\n", evt_path, |
916 | event_type_descriptors[PERF_TYPE_TRACEPOINT]); | 933 | event_type_descriptors[PERF_TYPE_TRACEPOINT]); |
917 | } | 934 | } |
918 | closedir(evt_dir); | 935 | closedir(evt_dir); |
@@ -977,7 +994,7 @@ void print_events_type(u8 type) | |||
977 | else | 994 | else |
978 | snprintf(name, sizeof(name), "%s", syms->symbol); | 995 | snprintf(name, sizeof(name), "%s", syms->symbol); |
979 | 996 | ||
980 | printf(" %-42s [%s]\n", name, | 997 | printf(" %-50s [%s]\n", name, |
981 | event_type_descriptors[type]); | 998 | event_type_descriptors[type]); |
982 | } | 999 | } |
983 | } | 1000 | } |
@@ -995,11 +1012,10 @@ int print_hwcache_events(const char *event_glob) | |||
995 | for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { | 1012 | for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { |
996 | char *name = event_cache_name(type, op, i); | 1013 | char *name = event_cache_name(type, op, i); |
997 | 1014 | ||
998 | if (event_glob != NULL && | 1015 | if (event_glob != NULL && !strglobmatch(name, event_glob)) |
999 | !strglobmatch(name, event_glob)) | ||
1000 | continue; | 1016 | continue; |
1001 | 1017 | ||
1002 | printf(" %-42s [%s]\n", name, | 1018 | printf(" %-50s [%s]\n", name, |
1003 | event_type_descriptors[PERF_TYPE_HW_CACHE]); | 1019 | event_type_descriptors[PERF_TYPE_HW_CACHE]); |
1004 | ++printed; | 1020 | ++printed; |
1005 | } | 1021 | } |
@@ -1009,14 +1025,16 @@ int print_hwcache_events(const char *event_glob) | |||
1009 | return printed; | 1025 | return printed; |
1010 | } | 1026 | } |
1011 | 1027 | ||
1028 | #define MAX_NAME_LEN 100 | ||
1029 | |||
1012 | /* | 1030 | /* |
1013 | * Print the help text for the event symbols: | 1031 | * Print the help text for the event symbols: |
1014 | */ | 1032 | */ |
1015 | void print_events(const char *event_glob) | 1033 | void print_events(const char *event_glob) |
1016 | { | 1034 | { |
1017 | struct event_symbol *syms = event_symbols; | ||
1018 | unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; | 1035 | unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; |
1019 | char name[40]; | 1036 | struct event_symbol *syms = event_symbols; |
1037 | char name[MAX_NAME_LEN]; | ||
1020 | 1038 | ||
1021 | printf("\n"); | 1039 | printf("\n"); |
1022 | printf("List of pre-defined events (to be used in -e):\n"); | 1040 | printf("List of pre-defined events (to be used in -e):\n"); |
@@ -1036,10 +1054,10 @@ void print_events(const char *event_glob) | |||
1036 | continue; | 1054 | continue; |
1037 | 1055 | ||
1038 | if (strlen(syms->alias)) | 1056 | if (strlen(syms->alias)) |
1039 | sprintf(name, "%s OR %s", syms->symbol, syms->alias); | 1057 | snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); |
1040 | else | 1058 | else |
1041 | strcpy(name, syms->symbol); | 1059 | strncpy(name, syms->symbol, MAX_NAME_LEN); |
1042 | printf(" %-42s [%s]\n", name, | 1060 | printf(" %-50s [%s]\n", name, |
1043 | event_type_descriptors[type]); | 1061 | event_type_descriptors[type]); |
1044 | 1062 | ||
1045 | prev_type = type; | 1063 | prev_type = type; |
@@ -1056,12 +1074,12 @@ void print_events(const char *event_glob) | |||
1056 | return; | 1074 | return; |
1057 | 1075 | ||
1058 | printf("\n"); | 1076 | printf("\n"); |
1059 | printf(" %-42s [%s]\n", | 1077 | printf(" %-50s [%s]\n", |
1060 | "rNNN (see 'perf list --help' on how to encode it)", | 1078 | "rNNN (see 'perf list --help' on how to encode it)", |
1061 | event_type_descriptors[PERF_TYPE_RAW]); | 1079 | event_type_descriptors[PERF_TYPE_RAW]); |
1062 | printf("\n"); | 1080 | printf("\n"); |
1063 | 1081 | ||
1064 | printf(" %-42s [%s]\n", | 1082 | printf(" %-50s [%s]\n", |
1065 | "mem:<addr>[:access]", | 1083 | "mem:<addr>[:access]", |
1066 | event_type_descriptors[PERF_TYPE_BREAKPOINT]); | 1084 | event_type_descriptors[PERF_TYPE_BREAKPOINT]); |
1067 | printf("\n"); | 1085 | printf("\n"); |
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f5e38451fdc5..8b0eff8b8283 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c | |||
@@ -810,6 +810,9 @@ static struct { | |||
810 | { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, | 810 | { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, |
811 | { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, | 811 | { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, |
812 | 812 | ||
813 | { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | ||
814 | { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | ||
815 | |||
813 | { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, | 816 | { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, |
814 | { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, | 817 | { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, |
815 | { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, | 818 | { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, |