aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-01-29 06:47:44 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-01-30 08:36:53 -0500
commit13370a9b5bb88f7aa90e5be68972d95096b20a6d (patch)
tree78f29e88ca8e01bc14865fd1ecf83f994ee46776 /tools
parentc7a79c47c683de6979a3e1a96dc723b0606c07ca (diff)
perf stat: Add interval printing
This patch adds a new printing mode for perf stat. It allows interval printing. That means perf stat can now print event deltas at regular time interval. This is useful to detect phases in programs. The -I option enables interval printing. It expects an interval duration in milliseconds. Minimum is 100ms. Once, activated perf stat prints events deltas since last printout. All modes are supported. $ perf stat -I 1000 -e cycles noploop 10 noploop for 10 seconds # time counts events 1.000109853 2,388,560,546 cycles 2.000262846 2,393,332,358 cycles 3.000354131 2,393,176,537 cycles 4.000439503 2,393,203,790 cycles 5.000527075 2,393,167,675 cycles 6.000609052 2,393,203,670 cycles 7.000691082 2,393,175,678 cycles The output format makes it easy to feed into a plotting program such as gnuplot when the -I option is used in combination with the -x option: $ perf stat -x, -I 1000 -e cycles noploop 10 noploop for 10 seconds 1.000084113,2378775498,cycles 2.000245798,2391056897,cycles 3.000354445,2392089414,cycles 4.000459115,2390936603,cycles 5.000565341,2392108173,cycles Signed-off-by: Stephane Eranian <eranian@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1359460064-3060-3-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/builtin-stat.c157
2 files changed, 146 insertions, 15 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index cf0c3107e06e..5289da3344e9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -114,6 +114,10 @@ with it. --append may be used here. Examples:
114 114
115perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage 115perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
116 116
117-I msecs::
118--interval-print msecs::
119 print count deltas every N milliseconds (minimum: 100ms)
120 example: perf stat -I 1000 -e cycles -a sleep 5
117 121
118EXAMPLES 122EXAMPLES
119-------- 123--------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1c2ac148a7d5..493043abd164 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,10 @@
65#define CNTR_NOT_SUPPORTED "<not supported>" 65#define CNTR_NOT_SUPPORTED "<not supported>"
66#define CNTR_NOT_COUNTED "<not counted>" 66#define CNTR_NOT_COUNTED "<not counted>"
67 67
68static void print_stat(int argc, const char **argv);
69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix);
71
68static struct perf_evlist *evsel_list; 72static struct perf_evlist *evsel_list;
69 73
70static struct perf_target target = { 74static struct perf_target target = {
@@ -87,6 +91,8 @@ static FILE *output = NULL;
87static const char *pre_cmd = NULL; 91static const char *pre_cmd = NULL;
88static const char *post_cmd = NULL; 92static const char *post_cmd = NULL;
89static bool sync_run = false; 93static bool sync_run = false;
94static unsigned int interval = 0;
95static struct timespec ref_time;
90 96
91static volatile int done = 0; 97static volatile int done = 0;
92 98
@@ -94,6 +100,28 @@ struct perf_stat {
94 struct stats res_stats[3]; 100 struct stats res_stats[3];
95}; 101};
96 102
103static inline void diff_timespec(struct timespec *r, struct timespec *a,
104 struct timespec *b)
105{
106 r->tv_sec = a->tv_sec - b->tv_sec;
107 if (a->tv_nsec < b->tv_nsec) {
108 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
109 r->tv_sec--;
110 } else {
111 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
112 }
113}
114
115static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
116{
117 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
118}
119
120static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
121{
122 return perf_evsel__cpus(evsel)->nr;
123}
124
97static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 125static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
98{ 126{
99 evsel->priv = zalloc(sizeof(struct perf_stat)); 127 evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -106,14 +134,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
106 evsel->priv = NULL; 134 evsel->priv = NULL;
107} 135}
108 136
109static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) 137static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
110{ 138{
111 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; 139 void *addr;
140 size_t sz;
141
142 sz = sizeof(*evsel->counts) +
143 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
144
145 addr = zalloc(sz);
146 if (!addr)
147 return -ENOMEM;
148
149 evsel->prev_raw_counts = addr;
150
151 return 0;
112} 152}
113 153
114static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) 154static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
115{ 155{
116 return perf_evsel__cpus(evsel)->nr; 156 free(evsel->prev_raw_counts);
157 evsel->prev_raw_counts = NULL;
117} 158}
118 159
119static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 160static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -245,16 +286,69 @@ static int read_counter(struct perf_evsel *counter)
245 return 0; 286 return 0;
246} 287}
247 288
289static void print_interval(void)
290{
291 static int num_print_interval;
292 struct perf_evsel *counter;
293 struct perf_stat *ps;
294 struct timespec ts, rs;
295 char prefix[64];
296
297 if (no_aggr) {
298 list_for_each_entry(counter, &evsel_list->entries, node) {
299 ps = counter->priv;
300 memset(ps->res_stats, 0, sizeof(ps->res_stats));
301 read_counter(counter);
302 }
303 } else {
304 list_for_each_entry(counter, &evsel_list->entries, node) {
305 ps = counter->priv;
306 memset(ps->res_stats, 0, sizeof(ps->res_stats));
307 read_counter_aggr(counter);
308 }
309 }
310 clock_gettime(CLOCK_MONOTONIC, &ts);
311 diff_timespec(&rs, &ts, &ref_time);
312 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
313
314 if (num_print_interval == 0 && !csv_output) {
315 if (no_aggr)
316 fprintf(output, "# time CPU counts events\n");
317 else
318 fprintf(output, "# time counts events\n");
319 }
320
321 if (++num_print_interval == 25)
322 num_print_interval = 0;
323
324 if (no_aggr) {
325 list_for_each_entry(counter, &evsel_list->entries, node)
326 print_counter(counter, prefix);
327 } else {
328 list_for_each_entry(counter, &evsel_list->entries, node)
329 print_counter_aggr(counter, prefix);
330 }
331}
332
248static int __run_perf_stat(int argc __maybe_unused, const char **argv) 333static int __run_perf_stat(int argc __maybe_unused, const char **argv)
249{ 334{
250 char msg[512]; 335 char msg[512];
251 unsigned long long t0, t1; 336 unsigned long long t0, t1;
252 struct perf_evsel *counter; 337 struct perf_evsel *counter;
338 struct timespec ts;
253 int status = 0; 339 int status = 0;
254 int child_ready_pipe[2], go_pipe[2]; 340 int child_ready_pipe[2], go_pipe[2];
255 const bool forks = (argc > 0); 341 const bool forks = (argc > 0);
256 char buf; 342 char buf;
257 343
344 if (interval) {
345 ts.tv_sec = interval / 1000;
346 ts.tv_nsec = (interval % 1000) * 1000000;
347 } else {
348 ts.tv_sec = 1;
349 ts.tv_nsec = 0;
350 }
351
258 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 352 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
259 perror("failed to create pipes"); 353 perror("failed to create pipes");
260 return -1; 354 return -1;
@@ -347,14 +441,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
347 * Enable counters and exec the command: 441 * Enable counters and exec the command:
348 */ 442 */
349 t0 = rdclock(); 443 t0 = rdclock();
444 clock_gettime(CLOCK_MONOTONIC, &ref_time);
350 445
351 if (forks) { 446 if (forks) {
352 close(go_pipe[1]); 447 close(go_pipe[1]);
448 if (interval) {
449 while (!waitpid(child_pid, &status, WNOHANG)) {
450 nanosleep(&ts, NULL);
451 print_interval();
452 }
453 }
353 wait(&status); 454 wait(&status);
354 if (WIFSIGNALED(status)) 455 if (WIFSIGNALED(status))
355 psignal(WTERMSIG(status), argv[0]); 456 psignal(WTERMSIG(status), argv[0]);
356 } else { 457 } else {
357 while(!done) sleep(1); 458 while (!done) {
459 nanosleep(&ts, NULL);
460 if (interval)
461 print_interval();
462 }
358 } 463 }
359 464
360 t1 = rdclock(); 465 t1 = rdclock();
@@ -440,7 +545,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
440 if (evsel->cgrp) 545 if (evsel->cgrp)
441 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 546 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
442 547
443 if (csv_output) 548 if (csv_output || interval)
444 return; 549 return;
445 550
446 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 551 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -654,12 +759,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
654 if (evsel->cgrp) 759 if (evsel->cgrp)
655 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 760 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
656 761
657 if (csv_output) 762 if (csv_output || interval)
658 return; 763 return;
659 764
660 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 765 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
661 total = avg_stats(&runtime_cycles_stats[cpu]); 766 total = avg_stats(&runtime_cycles_stats[cpu]);
662
663 if (total) 767 if (total)
664 ratio = avg / total; 768 ratio = avg / total;
665 769
@@ -753,12 +857,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
753 * Print out the results of a single counter: 857 * Print out the results of a single counter:
754 * aggregated counts in system-wide mode 858 * aggregated counts in system-wide mode
755 */ 859 */
756static void print_counter_aggr(struct perf_evsel *counter) 860static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
757{ 861{
758 struct perf_stat *ps = counter->priv; 862 struct perf_stat *ps = counter->priv;
759 double avg = avg_stats(&ps->res_stats[0]); 863 double avg = avg_stats(&ps->res_stats[0]);
760 int scaled = counter->counts->scaled; 864 int scaled = counter->counts->scaled;
761 865
866 if (prefix)
867 fprintf(output, "%s", prefix);
868
762 if (scaled == -1) { 869 if (scaled == -1) {
763 fprintf(output, "%*s%s%*s", 870 fprintf(output, "%*s%s%*s",
764 csv_output ? 0 : 18, 871 csv_output ? 0 : 18,
@@ -801,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
801 * Print out the results of a single counter: 908 * Print out the results of a single counter:
802 * does not use aggregated count in system-wide 909 * does not use aggregated count in system-wide
803 */ 910 */
804static void print_counter(struct perf_evsel *counter) 911static void print_counter(struct perf_evsel *counter, char *prefix)
805{ 912{
806 u64 ena, run, val; 913 u64 ena, run, val;
807 int cpu; 914 int cpu;
@@ -810,6 +917,10 @@ static void print_counter(struct perf_evsel *counter)
810 val = counter->counts->cpu[cpu].val; 917 val = counter->counts->cpu[cpu].val;
811 ena = counter->counts->cpu[cpu].ena; 918 ena = counter->counts->cpu[cpu].ena;
812 run = counter->counts->cpu[cpu].run; 919 run = counter->counts->cpu[cpu].run;
920
921 if (prefix)
922 fprintf(output, "%s", prefix);
923
813 if (run == 0 || ena == 0) { 924 if (run == 0 || ena == 0) {
814 fprintf(output, "CPU%*d%s%*s%s%*s", 925 fprintf(output, "CPU%*d%s%*s%s%*s",
815 csv_output ? 0 : -4, 926 csv_output ? 0 : -4,
@@ -871,10 +982,10 @@ static void print_stat(int argc, const char **argv)
871 982
872 if (no_aggr) { 983 if (no_aggr) {
873 list_for_each_entry(counter, &evsel_list->entries, node) 984 list_for_each_entry(counter, &evsel_list->entries, node)
874 print_counter(counter); 985 print_counter(counter, NULL);
875 } else { 986 } else {
876 list_for_each_entry(counter, &evsel_list->entries, node) 987 list_for_each_entry(counter, &evsel_list->entries, node)
877 print_counter_aggr(counter); 988 print_counter_aggr(counter, NULL);
878 } 989 }
879 990
880 if (!csv_output) { 991 if (!csv_output) {
@@ -895,7 +1006,7 @@ static volatile int signr = -1;
895 1006
896static void skip_signal(int signo) 1007static void skip_signal(int signo)
897{ 1008{
898 if(child_pid == -1) 1009 if ((child_pid == -1) || interval)
899 done = 1; 1010 done = 1;
900 1011
901 signr = signo; 1012 signr = signo;
@@ -1115,6 +1226,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1115 "command to run prior to the measured command"), 1226 "command to run prior to the measured command"),
1116 OPT_STRING(0, "post", &post_cmd, "command", 1227 OPT_STRING(0, "post", &post_cmd, "command",
1117 "command to run after to the measured command"), 1228 "command to run after to the measured command"),
1229 OPT_UINTEGER('I', "interval-print", &interval,
1230 "print counts at regular interval in ms (>= 100)"),
1118 OPT_END() 1231 OPT_END()
1119 }; 1232 };
1120 const char * const stat_usage[] = { 1233 const char * const stat_usage[] = {
@@ -1215,12 +1328,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1215 usage_with_options(stat_usage, options); 1328 usage_with_options(stat_usage, options);
1216 return -1; 1329 return -1;
1217 } 1330 }
1331 if (interval && interval < 100) {
1332 pr_err("print interval must be >= 100ms\n");
1333 usage_with_options(stat_usage, options);
1334 return -1;
1335 }
1218 1336
1219 list_for_each_entry(pos, &evsel_list->entries, node) { 1337 list_for_each_entry(pos, &evsel_list->entries, node) {
1220 if (perf_evsel__alloc_stat_priv(pos) < 0 || 1338 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1221 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) 1339 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
1222 goto out_free_fd; 1340 goto out_free_fd;
1223 } 1341 }
1342 if (interval) {
1343 list_for_each_entry(pos, &evsel_list->entries, node) {
1344 if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
1345 goto out_free_fd;
1346 }
1347 }
1224 1348
1225 /* 1349 /*
1226 * We dont want to block the signals - that would cause 1350 * We dont want to block the signals - that would cause
@@ -1230,6 +1354,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1230 */ 1354 */
1231 atexit(sig_atexit); 1355 atexit(sig_atexit);
1232 signal(SIGINT, skip_signal); 1356 signal(SIGINT, skip_signal);
1357 signal(SIGCHLD, skip_signal);
1233 signal(SIGALRM, skip_signal); 1358 signal(SIGALRM, skip_signal);
1234 signal(SIGABRT, skip_signal); 1359 signal(SIGABRT, skip_signal);
1235 1360
@@ -1242,11 +1367,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1242 status = run_perf_stat(argc, argv); 1367 status = run_perf_stat(argc, argv);
1243 } 1368 }
1244 1369
1245 if (status != -1) 1370 if (status != -1 && !interval)
1246 print_stat(argc, argv); 1371 print_stat(argc, argv);
1247out_free_fd: 1372out_free_fd:
1248 list_for_each_entry(pos, &evsel_list->entries, node) 1373 list_for_each_entry(pos, &evsel_list->entries, node) {
1249 perf_evsel__free_stat_priv(pos); 1374 perf_evsel__free_stat_priv(pos);
1375 perf_evsel__free_prev_raw_counts(pos);
1376 }
1250 perf_evlist__delete_maps(evsel_list); 1377 perf_evlist__delete_maps(evsel_list);
1251out: 1378out:
1252 perf_evlist__delete(evsel_list); 1379 perf_evlist__delete(evsel_list);