aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c398
1 files changed, 232 insertions, 166 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 99848761f573..7e910bab1097 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -68,7 +68,7 @@
68static void print_stat(int argc, const char **argv); 68static void print_stat(int argc, const char **argv);
69static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix); 70static void print_counter(struct perf_evsel *counter, char *prefix);
71static void print_aggr_socket(char *prefix); 71static void print_aggr(char *prefix);
72 72
73static struct perf_evlist *evsel_list; 73static struct perf_evlist *evsel_list;
74 74
@@ -76,11 +76,17 @@ static struct perf_target target = {
76 .uid = UINT_MAX, 76 .uid = UINT_MAX,
77}; 77};
78 78
79enum aggr_mode {
80 AGGR_NONE,
81 AGGR_GLOBAL,
82 AGGR_SOCKET,
83 AGGR_CORE,
84};
85
79static int run_count = 1; 86static int run_count = 1;
80static bool no_inherit = false; 87static bool no_inherit = false;
81static bool scale = true; 88static bool scale = true;
82static bool no_aggr = false; 89static enum aggr_mode aggr_mode = AGGR_GLOBAL;
83static bool aggr_socket = false;
84static pid_t child_pid = -1; 90static pid_t child_pid = -1;
85static bool null_run = false; 91static bool null_run = false;
86static int detailed_run = 0; 92static int detailed_run = 0;
@@ -94,8 +100,10 @@ static const char *pre_cmd = NULL;
94static const char *post_cmd = NULL; 100static const char *post_cmd = NULL;
95static bool sync_run = false; 101static bool sync_run = false;
96static unsigned int interval = 0; 102static unsigned int interval = 0;
103static bool forever = false;
97static struct timespec ref_time; 104static struct timespec ref_time;
98static struct cpu_map *sock_map; 105static struct cpu_map *aggr_map;
106static int (*aggr_get_id)(struct cpu_map *m, int cpu);
99 107
100static volatile int done = 0; 108static volatile int done = 0;
101 109
@@ -125,6 +133,11 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
125 return perf_evsel__cpus(evsel)->nr; 133 return perf_evsel__cpus(evsel)->nr;
126} 134}
127 135
136static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
137{
138 memset(evsel->priv, 0, sizeof(struct perf_stat));
139}
140
128static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 141static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
129{ 142{
130 evsel->priv = zalloc(sizeof(struct perf_stat)); 143 evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -160,6 +173,35 @@ static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
160 evsel->prev_raw_counts = NULL; 173 evsel->prev_raw_counts = NULL;
161} 174}
162 175
176static void perf_evlist__free_stats(struct perf_evlist *evlist)
177{
178 struct perf_evsel *evsel;
179
180 list_for_each_entry(evsel, &evlist->entries, node) {
181 perf_evsel__free_stat_priv(evsel);
182 perf_evsel__free_counts(evsel);
183 perf_evsel__free_prev_raw_counts(evsel);
184 }
185}
186
187static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
188{
189 struct perf_evsel *evsel;
190
191 list_for_each_entry(evsel, &evlist->entries, node) {
192 if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
193 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
194 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
195 goto out_free;
196 }
197
198 return 0;
199
200out_free:
201 perf_evlist__free_stats(evlist);
202 return -1;
203}
204
163static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 205static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
164static struct stats runtime_cycles_stats[MAX_NR_CPUS]; 206static struct stats runtime_cycles_stats[MAX_NR_CPUS];
165static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; 207static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
@@ -173,6 +215,29 @@ static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
173static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 215static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
174static struct stats walltime_nsecs_stats; 216static struct stats walltime_nsecs_stats;
175 217
218static void perf_stat__reset_stats(struct perf_evlist *evlist)
219{
220 struct perf_evsel *evsel;
221
222 list_for_each_entry(evsel, &evlist->entries, node) {
223 perf_evsel__reset_stat_priv(evsel);
224 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
225 }
226
227 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
228 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
229 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
230 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
231 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
232 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
233 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
234 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
235 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
236 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
237 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
238 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
239}
240
176static int create_perf_stat_counter(struct perf_evsel *evsel) 241static int create_perf_stat_counter(struct perf_evsel *evsel)
177{ 242{
178 struct perf_event_attr *attr = &evsel->attr; 243 struct perf_event_attr *attr = &evsel->attr;
@@ -249,7 +314,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
249 int i; 314 int i;
250 315
251 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), 316 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
252 evsel_list->threads->nr, scale) < 0) 317 thread_map__nr(evsel_list->threads), scale) < 0)
253 return -1; 318 return -1;
254 319
255 for (i = 0; i < 3; i++) 320 for (i = 0; i < 3; i++)
@@ -297,56 +362,68 @@ static void print_interval(void)
297 struct timespec ts, rs; 362 struct timespec ts, rs;
298 char prefix[64]; 363 char prefix[64];
299 364
300 if (no_aggr) { 365 if (aggr_mode == AGGR_GLOBAL) {
301 list_for_each_entry(counter, &evsel_list->entries, node) { 366 list_for_each_entry(counter, &evsel_list->entries, node) {
302 ps = counter->priv; 367 ps = counter->priv;
303 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 368 memset(ps->res_stats, 0, sizeof(ps->res_stats));
304 read_counter(counter); 369 read_counter_aggr(counter);
305 } 370 }
306 } else { 371 } else {
307 list_for_each_entry(counter, &evsel_list->entries, node) { 372 list_for_each_entry(counter, &evsel_list->entries, node) {
308 ps = counter->priv; 373 ps = counter->priv;
309 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 374 memset(ps->res_stats, 0, sizeof(ps->res_stats));
310 read_counter_aggr(counter); 375 read_counter(counter);
311 } 376 }
312 } 377 }
378
313 clock_gettime(CLOCK_MONOTONIC, &ts); 379 clock_gettime(CLOCK_MONOTONIC, &ts);
314 diff_timespec(&rs, &ts, &ref_time); 380 diff_timespec(&rs, &ts, &ref_time);
315 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 381 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
316 382
317 if (num_print_interval == 0 && !csv_output) { 383 if (num_print_interval == 0 && !csv_output) {
318 if (aggr_socket) 384 switch (aggr_mode) {
385 case AGGR_SOCKET:
319 fprintf(output, "# time socket cpus counts events\n"); 386 fprintf(output, "# time socket cpus counts events\n");
320 else if (no_aggr) 387 break;
388 case AGGR_CORE:
389 fprintf(output, "# time core cpus counts events\n");
390 break;
391 case AGGR_NONE:
321 fprintf(output, "# time CPU counts events\n"); 392 fprintf(output, "# time CPU counts events\n");
322 else 393 break;
394 case AGGR_GLOBAL:
395 default:
323 fprintf(output, "# time counts events\n"); 396 fprintf(output, "# time counts events\n");
397 }
324 } 398 }
325 399
326 if (++num_print_interval == 25) 400 if (++num_print_interval == 25)
327 num_print_interval = 0; 401 num_print_interval = 0;
328 402
329 if (aggr_socket) 403 switch (aggr_mode) {
330 print_aggr_socket(prefix); 404 case AGGR_CORE:
331 else if (no_aggr) { 405 case AGGR_SOCKET:
406 print_aggr(prefix);
407 break;
408 case AGGR_NONE:
332 list_for_each_entry(counter, &evsel_list->entries, node) 409 list_for_each_entry(counter, &evsel_list->entries, node)
333 print_counter(counter, prefix); 410 print_counter(counter, prefix);
334 } else { 411 break;
412 case AGGR_GLOBAL:
413 default:
335 list_for_each_entry(counter, &evsel_list->entries, node) 414 list_for_each_entry(counter, &evsel_list->entries, node)
336 print_counter_aggr(counter, prefix); 415 print_counter_aggr(counter, prefix);
337 } 416 }
338} 417}
339 418
340static int __run_perf_stat(int argc __maybe_unused, const char **argv) 419static int __run_perf_stat(int argc, const char **argv)
341{ 420{
342 char msg[512]; 421 char msg[512];
343 unsigned long long t0, t1; 422 unsigned long long t0, t1;
344 struct perf_evsel *counter; 423 struct perf_evsel *counter;
345 struct timespec ts; 424 struct timespec ts;
346 int status = 0; 425 int status = 0;
347 int child_ready_pipe[2], go_pipe[2];
348 const bool forks = (argc > 0); 426 const bool forks = (argc > 0);
349 char buf;
350 427
351 if (interval) { 428 if (interval) {
352 ts.tv_sec = interval / 1000; 429 ts.tv_sec = interval / 1000;
@@ -356,61 +433,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
356 ts.tv_nsec = 0; 433 ts.tv_nsec = 0;
357 } 434 }
358 435
359 if (aggr_socket
360 && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
361 perror("cannot build socket map");
362 return -1;
363 }
364
365 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
366 perror("failed to create pipes");
367 return -1;
368 }
369
370 if (forks) { 436 if (forks) {
371 if ((child_pid = fork()) < 0) 437 if (perf_evlist__prepare_workload(evsel_list, &target, argv,
372 perror("failed to fork"); 438 false, false) < 0) {
373 439 perror("failed to prepare workload");
374 if (!child_pid) { 440 return -1;
375 close(child_ready_pipe[0]);
376 close(go_pipe[1]);
377 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
378
379 /*
380 * Do a dummy execvp to get the PLT entry resolved,
381 * so we avoid the resolver overhead on the real
382 * execvp call.
383 */
384 execvp("", (char **)argv);
385
386 /*
387 * Tell the parent we're ready to go
388 */
389 close(child_ready_pipe[1]);
390
391 /*
392 * Wait until the parent tells us to go.
393 */
394 if (read(go_pipe[0], &buf, 1) == -1)
395 perror("unable to read pipe");
396
397 execvp(argv[0], (char **)argv);
398
399 perror(argv[0]);
400 exit(-1);
401 } 441 }
402
403 if (perf_target__none(&target))
404 evsel_list->threads->map[0] = child_pid;
405
406 /*
407 * Wait for the child to be ready to exec.
408 */
409 close(child_ready_pipe[1]);
410 close(go_pipe[0]);
411 if (read(child_ready_pipe[0], &buf, 1) == -1)
412 perror("unable to read pipe");
413 close(child_ready_pipe[0]);
414 } 442 }
415 443
416 if (group) 444 if (group)
@@ -457,7 +485,8 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
457 clock_gettime(CLOCK_MONOTONIC, &ref_time); 485 clock_gettime(CLOCK_MONOTONIC, &ref_time);
458 486
459 if (forks) { 487 if (forks) {
460 close(go_pipe[1]); 488 perf_evlist__start_workload(evsel_list);
489
461 if (interval) { 490 if (interval) {
462 while (!waitpid(child_pid, &status, WNOHANG)) { 491 while (!waitpid(child_pid, &status, WNOHANG)) {
463 nanosleep(&ts, NULL); 492 nanosleep(&ts, NULL);
@@ -479,16 +508,16 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
479 508
480 update_stats(&walltime_nsecs_stats, t1 - t0); 509 update_stats(&walltime_nsecs_stats, t1 - t0);
481 510
482 if (no_aggr) { 511 if (aggr_mode == AGGR_GLOBAL) {
483 list_for_each_entry(counter, &evsel_list->entries, node) { 512 list_for_each_entry(counter, &evsel_list->entries, node) {
484 read_counter(counter); 513 read_counter_aggr(counter);
485 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); 514 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
515 thread_map__nr(evsel_list->threads));
486 } 516 }
487 } else { 517 } else {
488 list_for_each_entry(counter, &evsel_list->entries, node) { 518 list_for_each_entry(counter, &evsel_list->entries, node) {
489 read_counter_aggr(counter); 519 read_counter(counter);
490 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 520 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
491 evsel_list->threads->nr);
492 } 521 }
493 } 522 }
494 523
@@ -542,26 +571,47 @@ static void print_noise(struct perf_evsel *evsel, double avg)
542 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 571 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
543} 572}
544 573
545static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 574static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
546{ 575{
547 double msecs = avg / 1e6; 576 switch (aggr_mode) {
548 char cpustr[16] = { '\0', }; 577 case AGGR_CORE:
549 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; 578 fprintf(output, "S%d-C%*d%s%*d%s",
550 579 cpu_map__id_to_socket(id),
551 if (aggr_socket) 580 csv_output ? 0 : -8,
552 sprintf(cpustr, "S%*d%s%*d%s", 581 cpu_map__id_to_cpu(id),
582 csv_sep,
583 csv_output ? 0 : 4,
584 nr,
585 csv_sep);
586 break;
587 case AGGR_SOCKET:
588 fprintf(output, "S%*d%s%*d%s",
553 csv_output ? 0 : -5, 589 csv_output ? 0 : -5,
554 cpu, 590 id,
555 csv_sep, 591 csv_sep,
556 csv_output ? 0 : 4, 592 csv_output ? 0 : 4,
557 nr, 593 nr,
558 csv_sep); 594 csv_sep);
559 else if (no_aggr) 595 break;
560 sprintf(cpustr, "CPU%*d%s", 596 case AGGR_NONE:
597 fprintf(output, "CPU%*d%s",
561 csv_output ? 0 : -4, 598 csv_output ? 0 : -4,
562 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 599 perf_evsel__cpus(evsel)->map[id], csv_sep);
600 break;
601 case AGGR_GLOBAL:
602 default:
603 break;
604 }
605}
606
607static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
608{
609 double msecs = avg / 1e6;
610 const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
611
612 aggr_printout(evsel, cpu, nr);
563 613
564 fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); 614 fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
565 615
566 if (evsel->cgrp) 616 if (evsel->cgrp)
567 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 617 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -758,32 +808,21 @@ static void print_ll_cache_misses(int cpu,
758static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 808static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
759{ 809{
760 double total, ratio = 0.0; 810 double total, ratio = 0.0;
761 char cpustr[16] = { '\0', };
762 const char *fmt; 811 const char *fmt;
763 812
764 if (csv_output) 813 if (csv_output)
765 fmt = "%s%.0f%s%s"; 814 fmt = "%.0f%s%s";
766 else if (big_num) 815 else if (big_num)
767 fmt = "%s%'18.0f%s%-25s"; 816 fmt = "%'18.0f%s%-25s";
768 else 817 else
769 fmt = "%s%18.0f%s%-25s"; 818 fmt = "%18.0f%s%-25s";
770 819
771 if (aggr_socket) 820 aggr_printout(evsel, cpu, nr);
772 sprintf(cpustr, "S%*d%s%*d%s", 821
773 csv_output ? 0 : -5, 822 if (aggr_mode == AGGR_GLOBAL)
774 cpu,
775 csv_sep,
776 csv_output ? 0 : 4,
777 nr,
778 csv_sep);
779 else if (no_aggr)
780 sprintf(cpustr, "CPU%*d%s",
781 csv_output ? 0 : -4,
782 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
783 else
784 cpu = 0; 823 cpu = 0;
785 824
786 fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); 825 fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
787 826
788 if (evsel->cgrp) 827 if (evsel->cgrp)
789 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 828 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -882,23 +921,23 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
882 } 921 }
883} 922}
884 923
885static void print_aggr_socket(char *prefix) 924static void print_aggr(char *prefix)
886{ 925{
887 struct perf_evsel *counter; 926 struct perf_evsel *counter;
927 int cpu, s, s2, id, nr;
888 u64 ena, run, val; 928 u64 ena, run, val;
889 int cpu, s, s2, sock, nr;
890 929
891 if (!sock_map) 930 if (!(aggr_map || aggr_get_id))
892 return; 931 return;
893 932
894 for (s = 0; s < sock_map->nr; s++) { 933 for (s = 0; s < aggr_map->nr; s++) {
895 sock = cpu_map__socket(sock_map, s); 934 id = aggr_map->map[s];
896 list_for_each_entry(counter, &evsel_list->entries, node) { 935 list_for_each_entry(counter, &evsel_list->entries, node) {
897 val = ena = run = 0; 936 val = ena = run = 0;
898 nr = 0; 937 nr = 0;
899 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 938 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
900 s2 = cpu_map__get_socket(evsel_list->cpus, cpu); 939 s2 = aggr_get_id(evsel_list->cpus, cpu);
901 if (s2 != sock) 940 if (s2 != id)
902 continue; 941 continue;
903 val += counter->counts->cpu[cpu].val; 942 val += counter->counts->cpu[cpu].val;
904 ena += counter->counts->cpu[cpu].ena; 943 ena += counter->counts->cpu[cpu].ena;
@@ -909,18 +948,15 @@ static void print_aggr_socket(char *prefix)
909 fprintf(output, "%s", prefix); 948 fprintf(output, "%s", prefix);
910 949
911 if (run == 0 || ena == 0) { 950 if (run == 0 || ena == 0) {
912 fprintf(output, "S%*d%s%*d%s%*s%s%*s", 951 aggr_printout(counter, cpu, nr);
913 csv_output ? 0 : -5, 952
914 s, 953 fprintf(output, "%*s%s%*s",
915 csv_sep,
916 csv_output ? 0 : 4,
917 nr,
918 csv_sep,
919 csv_output ? 0 : 18, 954 csv_output ? 0 : 18,
920 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 955 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
921 csv_sep, 956 csv_sep,
922 csv_output ? 0 : -24, 957 csv_output ? 0 : -24,
923 perf_evsel__name(counter)); 958 perf_evsel__name(counter));
959
924 if (counter->cgrp) 960 if (counter->cgrp)
925 fprintf(output, "%s%s", 961 fprintf(output, "%s%s",
926 csv_sep, counter->cgrp->name); 962 csv_sep, counter->cgrp->name);
@@ -930,9 +966,9 @@ static void print_aggr_socket(char *prefix)
930 } 966 }
931 967
932 if (nsec_counter(counter)) 968 if (nsec_counter(counter))
933 nsec_printout(sock, nr, counter, val); 969 nsec_printout(id, nr, counter, val);
934 else 970 else
935 abs_printout(sock, nr, counter, val); 971 abs_printout(id, nr, counter, val);
936 972
937 if (!csv_output) { 973 if (!csv_output) {
938 print_noise(counter, 1.0); 974 print_noise(counter, 1.0);
@@ -1073,14 +1109,21 @@ static void print_stat(int argc, const char **argv)
1073 fprintf(output, ":\n\n"); 1109 fprintf(output, ":\n\n");
1074 } 1110 }
1075 1111
1076 if (aggr_socket) 1112 switch (aggr_mode) {
1077 print_aggr_socket(NULL); 1113 case AGGR_CORE:
1078 else if (no_aggr) { 1114 case AGGR_SOCKET:
1079 list_for_each_entry(counter, &evsel_list->entries, node) 1115 print_aggr(NULL);
1080 print_counter(counter, NULL); 1116 break;
1081 } else { 1117 case AGGR_GLOBAL:
1082 list_for_each_entry(counter, &evsel_list->entries, node) 1118 list_for_each_entry(counter, &evsel_list->entries, node)
1083 print_counter_aggr(counter, NULL); 1119 print_counter_aggr(counter, NULL);
1120 break;
1121 case AGGR_NONE:
1122 list_for_each_entry(counter, &evsel_list->entries, node)
1123 print_counter(counter, NULL);
1124 break;
1125 default:
1126 break;
1084 } 1127 }
1085 1128
1086 if (!csv_output) { 1129 if (!csv_output) {
@@ -1126,6 +1169,32 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
1126 return 0; 1169 return 0;
1127} 1170}
1128 1171
1172static int perf_stat_init_aggr_mode(void)
1173{
1174 switch (aggr_mode) {
1175 case AGGR_SOCKET:
1176 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1177 perror("cannot build socket map");
1178 return -1;
1179 }
1180 aggr_get_id = cpu_map__get_socket;
1181 break;
1182 case AGGR_CORE:
1183 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1184 perror("cannot build core map");
1185 return -1;
1186 }
1187 aggr_get_id = cpu_map__get_core;
1188 break;
1189 case AGGR_NONE:
1190 case AGGR_GLOBAL:
1191 default:
1192 break;
1193 }
1194 return 0;
1195}
1196
1197
1129/* 1198/*
1130 * Add default attributes, if there were no attributes specified or 1199 * Add default attributes, if there were no attributes specified or
1131 * if -d/--detailed, -d -d or -d -d -d is used: 1200 * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1296,7 +1365,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1296 OPT_INCR('v', "verbose", &verbose, 1365 OPT_INCR('v', "verbose", &verbose,
1297 "be more verbose (show counter open errors, etc)"), 1366 "be more verbose (show counter open errors, etc)"),
1298 OPT_INTEGER('r', "repeat", &run_count, 1367 OPT_INTEGER('r', "repeat", &run_count,
1299 "repeat command and print average + stddev (max: 100)"), 1368 "repeat command and print average + stddev (max: 100, forever: 0)"),
1300 OPT_BOOLEAN('n', "null", &null_run, 1369 OPT_BOOLEAN('n', "null", &null_run,
1301 "null run - dont start any counters"), 1370 "null run - dont start any counters"),
1302 OPT_INCR('d', "detailed", &detailed_run, 1371 OPT_INCR('d', "detailed", &detailed_run,
@@ -1308,7 +1377,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1308 stat__set_big_num), 1377 stat__set_big_num),
1309 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1378 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1310 "list of cpus to monitor in system-wide"), 1379 "list of cpus to monitor in system-wide"),
1311 OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"), 1380 OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1381 "disable CPU count aggregation", AGGR_NONE),
1312 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1382 OPT_STRING('x', "field-separator", &csv_sep, "separator",
1313 "print counts with custom separator"), 1383 "print counts with custom separator"),
1314 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1384 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
@@ -1323,20 +1393,22 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1323 "command to run after to the measured command"), 1393 "command to run after to the measured command"),
1324 OPT_UINTEGER('I', "interval-print", &interval, 1394 OPT_UINTEGER('I', "interval-print", &interval,
1325 "print counts at regular interval in ms (>= 100)"), 1395 "print counts at regular interval in ms (>= 100)"),
1326 OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), 1396 OPT_SET_UINT(0, "per-socket", &aggr_mode,
1397 "aggregate counts per processor socket", AGGR_SOCKET),
1398 OPT_SET_UINT(0, "per-core", &aggr_mode,
1399 "aggregate counts per physical processor core", AGGR_CORE),
1327 OPT_END() 1400 OPT_END()
1328 }; 1401 };
1329 const char * const stat_usage[] = { 1402 const char * const stat_usage[] = {
1330 "perf stat [<options>] [<command>]", 1403 "perf stat [<options>] [<command>]",
1331 NULL 1404 NULL
1332 }; 1405 };
1333 struct perf_evsel *pos;
1334 int status = -ENOMEM, run_idx; 1406 int status = -ENOMEM, run_idx;
1335 const char *mode; 1407 const char *mode;
1336 1408
1337 setlocale(LC_ALL, ""); 1409 setlocale(LC_ALL, "");
1338 1410
1339 evsel_list = perf_evlist__new(NULL, NULL); 1411 evsel_list = perf_evlist__new();
1340 if (evsel_list == NULL) 1412 if (evsel_list == NULL)
1341 return -ENOMEM; 1413 return -ENOMEM;
1342 1414
@@ -1399,23 +1471,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1399 1471
1400 if (!argc && !perf_target__has_task(&target)) 1472 if (!argc && !perf_target__has_task(&target))
1401 usage_with_options(stat_usage, options); 1473 usage_with_options(stat_usage, options);
1402 if (run_count <= 0) 1474 if (run_count < 0) {
1403 usage_with_options(stat_usage, options); 1475 usage_with_options(stat_usage, options);
1476 } else if (run_count == 0) {
1477 forever = true;
1478 run_count = 1;
1479 }
1404 1480
1405 /* no_aggr, cgroup are for system-wide only */ 1481 /* no_aggr, cgroup are for system-wide only */
1406 if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) { 1482 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
1483 && !perf_target__has_cpu(&target)) {
1407 fprintf(stderr, "both cgroup and no-aggregation " 1484 fprintf(stderr, "both cgroup and no-aggregation "
1408 "modes only available in system-wide mode\n"); 1485 "modes only available in system-wide mode\n");
1409 1486
1410 usage_with_options(stat_usage, options); 1487 usage_with_options(stat_usage, options);
1411 } 1488 return -1;
1412
1413 if (aggr_socket) {
1414 if (!perf_target__has_cpu(&target)) {
1415 fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
1416 usage_with_options(stat_usage, options);
1417 }
1418 no_aggr = true;
1419 } 1489 }
1420 1490
1421 if (add_default_attributes()) 1491 if (add_default_attributes())
@@ -1438,17 +1508,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1438 return -1; 1508 return -1;
1439 } 1509 }
1440 1510
1441 list_for_each_entry(pos, &evsel_list->entries, node) { 1511 if (perf_evlist__alloc_stats(evsel_list, interval))
1442 if (perf_evsel__alloc_stat_priv(pos) < 0 || 1512 goto out_free_maps;
1443 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) 1513
1444 goto out_free_fd; 1514 if (perf_stat_init_aggr_mode())
1445 } 1515 goto out;
1446 if (interval) {
1447 list_for_each_entry(pos, &evsel_list->entries, node) {
1448 if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
1449 goto out_free_fd;
1450 }
1451 }
1452 1516
1453 /* 1517 /*
1454 * We dont want to block the signals - that would cause 1518 * We dont want to block the signals - that would cause
@@ -1457,28 +1521,30 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1457 * task, but being ignored by perf stat itself: 1521 * task, but being ignored by perf stat itself:
1458 */ 1522 */
1459 atexit(sig_atexit); 1523 atexit(sig_atexit);
1460 signal(SIGINT, skip_signal); 1524 if (!forever)
1525 signal(SIGINT, skip_signal);
1461 signal(SIGCHLD, skip_signal); 1526 signal(SIGCHLD, skip_signal);
1462 signal(SIGALRM, skip_signal); 1527 signal(SIGALRM, skip_signal);
1463 signal(SIGABRT, skip_signal); 1528 signal(SIGABRT, skip_signal);
1464 1529
1465 status = 0; 1530 status = 0;
1466 for (run_idx = 0; run_idx < run_count; run_idx++) { 1531 for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1467 if (run_count != 1 && verbose) 1532 if (run_count != 1 && verbose)
1468 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1533 fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1469 run_idx + 1); 1534 run_idx + 1);
1470 1535
1471 status = run_perf_stat(argc, argv); 1536 status = run_perf_stat(argc, argv);
1537 if (forever && status != -1) {
1538 print_stat(argc, argv);
1539 perf_stat__reset_stats(evsel_list);
1540 }
1472 } 1541 }
1473 1542
1474 if (status != -1 && !interval) 1543 if (!forever && status != -1 && !interval)
1475 print_stat(argc, argv); 1544 print_stat(argc, argv);
1476out_free_fd: 1545
1477 list_for_each_entry(pos, &evsel_list->entries, node) { 1546 perf_evlist__free_stats(evsel_list);
1478 perf_evsel__free_stat_priv(pos); 1547out_free_maps:
1479 perf_evsel__free_counts(pos);
1480 perf_evsel__free_prev_raw_counts(pos);
1481 }
1482 perf_evlist__delete_maps(evsel_list); 1548 perf_evlist__delete_maps(evsel_list);
1483out: 1549out:
1484 perf_evlist__delete(evsel_list); 1550 perf_evlist__delete(evsel_list);