diff options
author | Paul Mackerras <paulus@samba.org> | 2009-03-30 13:07:10 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-06 03:30:41 -0400 |
commit | 31f004df8d14212f0a8a2fb12a8ed44a3d80e2fb (patch) | |
tree | 514bcf29416b141aa661b255b6da52e763ddceb5 /Documentation/perf_counter | |
parent | 9ea98e191255ee642e64a5745014424fc63f83b0 (diff) |
perf_counter tools: optionally scale counter values in perfstat mode
Impact: new functionality
This adds add an option to the perfstat mode of kerneltop to scale the
reported counter values according to the fraction of time that each
counter gets to count. This is invoked with the -l option (I used 'l'
because s, c, a and e were all taken already.) This uses the new
PERF_RECORD_TOTAL_TIME_{ENABLED,RUNNING} read format options.
With this, we get output like this:
$ ./perfstat -l -e 0:0,0:1,0:2,0:3,0:4,0:5 ./spin
Performance counter stats for './spin':
4016072055 CPU cycles (events) (scaled from 66.53%)
2005887318 instructions (events) (scaled from 66.53%)
1762849 cache references (events) (scaled from 66.69%)
165229 cache misses (events) (scaled from 66.85%)
1001298009 branches (events) (scaled from 66.78%)
41566 branch misses (events) (scaled from 66.61%)
Wall-clock time elapsed: 2438.227446 msecs
This also lets us detect when a counter is zero because the counter
never got to go on the CPU at all. In that case we print <not counted>
rather than 0.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090330171023.871484899@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'Documentation/perf_counter')
-rw-r--r-- | Documentation/perf_counter/kerneltop.c | 56 |
1 files changed, 45 insertions, 11 deletions
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 995111dee7fb..c0ca01504ff3 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c | |||
@@ -197,6 +197,8 @@ static int delay_secs = 2; | |||
197 | static int zero; | 197 | static int zero; |
198 | static int dump_symtab; | 198 | static int dump_symtab; |
199 | 199 | ||
200 | static int scale; | ||
201 | |||
200 | struct source_line { | 202 | struct source_line { |
201 | uint64_t EIP; | 203 | uint64_t EIP; |
202 | unsigned long count; | 204 | unsigned long count; |
@@ -305,6 +307,7 @@ static void display_perfstat_help(void) | |||
305 | display_events_help(); | 307 | display_events_help(); |
306 | 308 | ||
307 | printf( | 309 | printf( |
310 | " -l # scale counter values\n" | ||
308 | " -a # system-wide collection\n"); | 311 | " -a # system-wide collection\n"); |
309 | exit(0); | 312 | exit(0); |
310 | } | 313 | } |
@@ -328,6 +331,7 @@ static void display_help(void) | |||
328 | " -c CNT --count=CNT # event period to sample\n\n" | 331 | " -c CNT --count=CNT # event period to sample\n\n" |
329 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" | 332 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" |
330 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" | 333 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" |
334 | " -l # show scale factor for RR events\n" | ||
331 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" | 335 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" |
332 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" | 336 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" |
333 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" | 337 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" |
@@ -436,6 +440,9 @@ static void create_perfstat_counter(int counter) | |||
436 | hw_event.config = event_id[counter]; | 440 | hw_event.config = event_id[counter]; |
437 | hw_event.record_type = PERF_RECORD_SIMPLE; | 441 | hw_event.record_type = PERF_RECORD_SIMPLE; |
438 | hw_event.nmi = 0; | 442 | hw_event.nmi = 0; |
443 | if (scale) | ||
444 | hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
445 | PERF_FORMAT_TOTAL_TIME_RUNNING; | ||
439 | 446 | ||
440 | if (system_wide) { | 447 | if (system_wide) { |
441 | int cpu; | 448 | int cpu; |
@@ -507,28 +514,53 @@ int do_perfstat(int argc, char *argv[]) | |||
507 | fprintf(stderr, "\n"); | 514 | fprintf(stderr, "\n"); |
508 | 515 | ||
509 | for (counter = 0; counter < nr_counters; counter++) { | 516 | for (counter = 0; counter < nr_counters; counter++) { |
510 | int cpu; | 517 | int cpu, nv; |
511 | __u64 count, single_count; | 518 | __u64 count[3], single_count[3]; |
519 | int scaled; | ||
512 | 520 | ||
513 | count = 0; | 521 | count[0] = count[1] = count[2] = 0; |
522 | nv = scale ? 3 : 1; | ||
514 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 523 | for (cpu = 0; cpu < nr_cpus; cpu ++) { |
515 | res = read(fd[cpu][counter], | 524 | res = read(fd[cpu][counter], |
516 | (char *) &single_count, sizeof(single_count)); | 525 | single_count, nv * sizeof(__u64)); |
517 | assert(res == sizeof(single_count)); | 526 | assert(res == nv * sizeof(__u64)); |
518 | count += single_count; | 527 | |
528 | count[0] += single_count[0]; | ||
529 | if (scale) { | ||
530 | count[1] += single_count[1]; | ||
531 | count[2] += single_count[2]; | ||
532 | } | ||
533 | } | ||
534 | |||
535 | scaled = 0; | ||
536 | if (scale) { | ||
537 | if (count[2] == 0) { | ||
538 | fprintf(stderr, " %14s %-20s\n", | ||
539 | "<not counted>", event_name(counter)); | ||
540 | continue; | ||
541 | } | ||
542 | if (count[2] < count[1]) { | ||
543 | scaled = 1; | ||
544 | count[0] = (unsigned long long) | ||
545 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
546 | } | ||
519 | } | 547 | } |
520 | 548 | ||
521 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || | 549 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || |
522 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { | 550 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { |
523 | 551 | ||
524 | double msecs = (double)count / 1000000; | 552 | double msecs = (double)count[0] / 1000000; |
525 | 553 | ||
526 | fprintf(stderr, " %14.6f %-20s (msecs)\n", | 554 | fprintf(stderr, " %14.6f %-20s (msecs)", |
527 | msecs, event_name(counter)); | 555 | msecs, event_name(counter)); |
528 | } else { | 556 | } else { |
529 | fprintf(stderr, " %14Ld %-20s (events)\n", | 557 | fprintf(stderr, " %14Ld %-20s (events)", |
530 | count, event_name(counter)); | 558 | count[0], event_name(counter)); |
531 | } | 559 | } |
560 | if (scaled) | ||
561 | fprintf(stderr, " (scaled from %.2f%%)", | ||
562 | (double) count[2] / count[1] * 100); | ||
563 | fprintf(stderr, "\n"); | ||
532 | } | 564 | } |
533 | fprintf(stderr, "\n"); | 565 | fprintf(stderr, "\n"); |
534 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | 566 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", |
@@ -1049,6 +1081,7 @@ static void process_options(int argc, char *argv[]) | |||
1049 | {"filter", required_argument, NULL, 'f'}, | 1081 | {"filter", required_argument, NULL, 'f'}, |
1050 | {"group", required_argument, NULL, 'g'}, | 1082 | {"group", required_argument, NULL, 'g'}, |
1051 | {"help", no_argument, NULL, 'h'}, | 1083 | {"help", no_argument, NULL, 'h'}, |
1084 | {"scale", no_argument, NULL, 'l'}, | ||
1052 | {"nmi", required_argument, NULL, 'n'}, | 1085 | {"nmi", required_argument, NULL, 'n'}, |
1053 | {"pid", required_argument, NULL, 'p'}, | 1086 | {"pid", required_argument, NULL, 'p'}, |
1054 | {"vmlinux", required_argument, NULL, 'x'}, | 1087 | {"vmlinux", required_argument, NULL, 'x'}, |
@@ -1060,7 +1093,7 @@ static void process_options(int argc, char *argv[]) | |||
1060 | {"munmap_info", no_argument, NULL, 'U'}, | 1093 | {"munmap_info", no_argument, NULL, 'U'}, |
1061 | {NULL, 0, NULL, 0 } | 1094 | {NULL, 0, NULL, 0 } |
1062 | }; | 1095 | }; |
1063 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU", | 1096 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU", |
1064 | long_options, &option_index); | 1097 | long_options, &option_index); |
1065 | if (c == -1) | 1098 | if (c == -1) |
1066 | break; | 1099 | break; |
@@ -1084,6 +1117,7 @@ static void process_options(int argc, char *argv[]) | |||
1084 | case 'f': count_filter = atoi(optarg); break; | 1117 | case 'f': count_filter = atoi(optarg); break; |
1085 | case 'g': group = atoi(optarg); break; | 1118 | case 'g': group = atoi(optarg); break; |
1086 | case 'h': display_help(); break; | 1119 | case 'h': display_help(); break; |
1120 | case 'l': scale = 1; break; | ||
1087 | case 'n': nmi = atoi(optarg); break; | 1121 | case 'n': nmi = atoi(optarg); break; |
1088 | case 'p': | 1122 | case 'p': |
1089 | /* CPU and PID are mutually exclusive */ | 1123 | /* CPU and PID are mutually exclusive */ |