aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/perf_counter
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-03-30 13:07:10 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-06 03:30:41 -0400
commit31f004df8d14212f0a8a2fb12a8ed44a3d80e2fb (patch)
tree514bcf29416b141aa661b255b6da52e763ddceb5 /Documentation/perf_counter
parent9ea98e191255ee642e64a5745014424fc63f83b0 (diff)
perf_counter tools: optionally scale counter values in perfstat mode
Impact: new functionality This adds add an option to the perfstat mode of kerneltop to scale the reported counter values according to the fraction of time that each counter gets to count. This is invoked with the -l option (I used 'l' because s, c, a and e were all taken already.) This uses the new PERF_RECORD_TOTAL_TIME_{ENABLED,RUNNING} read format options. With this, we get output like this: $ ./perfstat -l -e 0:0,0:1,0:2,0:3,0:4,0:5 ./spin Performance counter stats for './spin': 4016072055 CPU cycles (events) (scaled from 66.53%) 2005887318 instructions (events) (scaled from 66.53%) 1762849 cache references (events) (scaled from 66.69%) 165229 cache misses (events) (scaled from 66.85%) 1001298009 branches (events) (scaled from 66.78%) 41566 branch misses (events) (scaled from 66.61%) Wall-clock time elapsed: 2438.227446 msecs This also lets us detect when a counter is zero because the counter never got to go on the CPU at all. In that case we print <not counted> rather than 0. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Orig-LKML-Reference: <20090330171023.871484899@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'Documentation/perf_counter')
-rw-r--r--Documentation/perf_counter/kerneltop.c56
1 files changed, 45 insertions, 11 deletions
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 995111dee7fb..c0ca01504ff3 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -197,6 +197,8 @@ static int delay_secs = 2;
197static int zero; 197static int zero;
198static int dump_symtab; 198static int dump_symtab;
199 199
200static int scale;
201
200struct source_line { 202struct source_line {
201 uint64_t EIP; 203 uint64_t EIP;
202 unsigned long count; 204 unsigned long count;
@@ -305,6 +307,7 @@ static void display_perfstat_help(void)
305 display_events_help(); 307 display_events_help();
306 308
307 printf( 309 printf(
310 " -l # scale counter values\n"
308 " -a # system-wide collection\n"); 311 " -a # system-wide collection\n");
309 exit(0); 312 exit(0);
310} 313}
@@ -328,6 +331,7 @@ static void display_help(void)
328 " -c CNT --count=CNT # event period to sample\n\n" 331 " -c CNT --count=CNT # event period to sample\n\n"
329 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" 332 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
330 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" 333 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
334 " -l # show scale factor for RR events\n"
331 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" 335 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
332 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" 336 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
333 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" 337 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
@@ -436,6 +440,9 @@ static void create_perfstat_counter(int counter)
436 hw_event.config = event_id[counter]; 440 hw_event.config = event_id[counter];
437 hw_event.record_type = PERF_RECORD_SIMPLE; 441 hw_event.record_type = PERF_RECORD_SIMPLE;
438 hw_event.nmi = 0; 442 hw_event.nmi = 0;
443 if (scale)
444 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
445 PERF_FORMAT_TOTAL_TIME_RUNNING;
439 446
440 if (system_wide) { 447 if (system_wide) {
441 int cpu; 448 int cpu;
@@ -507,28 +514,53 @@ int do_perfstat(int argc, char *argv[])
507 fprintf(stderr, "\n"); 514 fprintf(stderr, "\n");
508 515
509 for (counter = 0; counter < nr_counters; counter++) { 516 for (counter = 0; counter < nr_counters; counter++) {
510 int cpu; 517 int cpu, nv;
511 __u64 count, single_count; 518 __u64 count[3], single_count[3];
519 int scaled;
512 520
513 count = 0; 521 count[0] = count[1] = count[2] = 0;
522 nv = scale ? 3 : 1;
514 for (cpu = 0; cpu < nr_cpus; cpu ++) { 523 for (cpu = 0; cpu < nr_cpus; cpu ++) {
515 res = read(fd[cpu][counter], 524 res = read(fd[cpu][counter],
516 (char *) &single_count, sizeof(single_count)); 525 single_count, nv * sizeof(__u64));
517 assert(res == sizeof(single_count)); 526 assert(res == nv * sizeof(__u64));
518 count += single_count; 527
528 count[0] += single_count[0];
529 if (scale) {
530 count[1] += single_count[1];
531 count[2] += single_count[2];
532 }
533 }
534
535 scaled = 0;
536 if (scale) {
537 if (count[2] == 0) {
538 fprintf(stderr, " %14s %-20s\n",
539 "<not counted>", event_name(counter));
540 continue;
541 }
542 if (count[2] < count[1]) {
543 scaled = 1;
544 count[0] = (unsigned long long)
545 ((double)count[0] * count[1] / count[2] + 0.5);
546 }
519 } 547 }
520 548
521 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || 549 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
522 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { 550 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
523 551
524 double msecs = (double)count / 1000000; 552 double msecs = (double)count[0] / 1000000;
525 553
526 fprintf(stderr, " %14.6f %-20s (msecs)\n", 554 fprintf(stderr, " %14.6f %-20s (msecs)",
527 msecs, event_name(counter)); 555 msecs, event_name(counter));
528 } else { 556 } else {
529 fprintf(stderr, " %14Ld %-20s (events)\n", 557 fprintf(stderr, " %14Ld %-20s (events)",
530 count, event_name(counter)); 558 count[0], event_name(counter));
531 } 559 }
560 if (scaled)
561 fprintf(stderr, " (scaled from %.2f%%)",
562 (double) count[2] / count[1] * 100);
563 fprintf(stderr, "\n");
532 } 564 }
533 fprintf(stderr, "\n"); 565 fprintf(stderr, "\n");
534 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", 566 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
@@ -1049,6 +1081,7 @@ static void process_options(int argc, char *argv[])
1049 {"filter", required_argument, NULL, 'f'}, 1081 {"filter", required_argument, NULL, 'f'},
1050 {"group", required_argument, NULL, 'g'}, 1082 {"group", required_argument, NULL, 'g'},
1051 {"help", no_argument, NULL, 'h'}, 1083 {"help", no_argument, NULL, 'h'},
1084 {"scale", no_argument, NULL, 'l'},
1052 {"nmi", required_argument, NULL, 'n'}, 1085 {"nmi", required_argument, NULL, 'n'},
1053 {"pid", required_argument, NULL, 'p'}, 1086 {"pid", required_argument, NULL, 'p'},
1054 {"vmlinux", required_argument, NULL, 'x'}, 1087 {"vmlinux", required_argument, NULL, 'x'},
@@ -1060,7 +1093,7 @@ static void process_options(int argc, char *argv[])
1060 {"munmap_info", no_argument, NULL, 'U'}, 1093 {"munmap_info", no_argument, NULL, 'U'},
1061 {NULL, 0, NULL, 0 } 1094 {NULL, 0, NULL, 0 }
1062 }; 1095 };
1063 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU", 1096 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU",
1064 long_options, &option_index); 1097 long_options, &option_index);
1065 if (c == -1) 1098 if (c == -1)
1066 break; 1099 break;
@@ -1084,6 +1117,7 @@ static void process_options(int argc, char *argv[])
1084 case 'f': count_filter = atoi(optarg); break; 1117 case 'f': count_filter = atoi(optarg); break;
1085 case 'g': group = atoi(optarg); break; 1118 case 'g': group = atoi(optarg); break;
1086 case 'h': display_help(); break; 1119 case 'h': display_help(); break;
1120 case 'l': scale = 1; break;
1087 case 'n': nmi = atoi(optarg); break; 1121 case 'n': nmi = atoi(optarg); break;
1088 case 'p': 1122 case 'p':
1089 /* CPU and PID are mutually exclusive */ 1123 /* CPU and PID are mutually exclusive */