diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 398 |
1 files changed, 232 insertions, 166 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 99848761f573..7e910bab1097 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -68,7 +68,7 @@ | |||
68 | static void print_stat(int argc, const char **argv); | 68 | static void print_stat(int argc, const char **argv); |
69 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix); | 69 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix); |
70 | static void print_counter(struct perf_evsel *counter, char *prefix); | 70 | static void print_counter(struct perf_evsel *counter, char *prefix); |
71 | static void print_aggr_socket(char *prefix); | 71 | static void print_aggr(char *prefix); |
72 | 72 | ||
73 | static struct perf_evlist *evsel_list; | 73 | static struct perf_evlist *evsel_list; |
74 | 74 | ||
@@ -76,11 +76,17 @@ static struct perf_target target = { | |||
76 | .uid = UINT_MAX, | 76 | .uid = UINT_MAX, |
77 | }; | 77 | }; |
78 | 78 | ||
79 | enum aggr_mode { | ||
80 | AGGR_NONE, | ||
81 | AGGR_GLOBAL, | ||
82 | AGGR_SOCKET, | ||
83 | AGGR_CORE, | ||
84 | }; | ||
85 | |||
79 | static int run_count = 1; | 86 | static int run_count = 1; |
80 | static bool no_inherit = false; | 87 | static bool no_inherit = false; |
81 | static bool scale = true; | 88 | static bool scale = true; |
82 | static bool no_aggr = false; | 89 | static enum aggr_mode aggr_mode = AGGR_GLOBAL; |
83 | static bool aggr_socket = false; | ||
84 | static pid_t child_pid = -1; | 90 | static pid_t child_pid = -1; |
85 | static bool null_run = false; | 91 | static bool null_run = false; |
86 | static int detailed_run = 0; | 92 | static int detailed_run = 0; |
@@ -94,8 +100,10 @@ static const char *pre_cmd = NULL; | |||
94 | static const char *post_cmd = NULL; | 100 | static const char *post_cmd = NULL; |
95 | static bool sync_run = false; | 101 | static bool sync_run = false; |
96 | static unsigned int interval = 0; | 102 | static unsigned int interval = 0; |
103 | static bool forever = false; | ||
97 | static struct timespec ref_time; | 104 | static struct timespec ref_time; |
98 | static struct cpu_map *sock_map; | 105 | static struct cpu_map *aggr_map; |
106 | static int (*aggr_get_id)(struct cpu_map *m, int cpu); | ||
99 | 107 | ||
100 | static volatile int done = 0; | 108 | static volatile int done = 0; |
101 | 109 | ||
@@ -125,6 +133,11 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) | |||
125 | return perf_evsel__cpus(evsel)->nr; | 133 | return perf_evsel__cpus(evsel)->nr; |
126 | } | 134 | } |
127 | 135 | ||
136 | static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) | ||
137 | { | ||
138 | memset(evsel->priv, 0, sizeof(struct perf_stat)); | ||
139 | } | ||
140 | |||
128 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | 141 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) |
129 | { | 142 | { |
130 | evsel->priv = zalloc(sizeof(struct perf_stat)); | 143 | evsel->priv = zalloc(sizeof(struct perf_stat)); |
@@ -160,6 +173,35 @@ static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) | |||
160 | evsel->prev_raw_counts = NULL; | 173 | evsel->prev_raw_counts = NULL; |
161 | } | 174 | } |
162 | 175 | ||
176 | static void perf_evlist__free_stats(struct perf_evlist *evlist) | ||
177 | { | ||
178 | struct perf_evsel *evsel; | ||
179 | |||
180 | list_for_each_entry(evsel, &evlist->entries, node) { | ||
181 | perf_evsel__free_stat_priv(evsel); | ||
182 | perf_evsel__free_counts(evsel); | ||
183 | perf_evsel__free_prev_raw_counts(evsel); | ||
184 | } | ||
185 | } | ||
186 | |||
187 | static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) | ||
188 | { | ||
189 | struct perf_evsel *evsel; | ||
190 | |||
191 | list_for_each_entry(evsel, &evlist->entries, node) { | ||
192 | if (perf_evsel__alloc_stat_priv(evsel) < 0 || | ||
193 | perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || | ||
194 | (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) | ||
195 | goto out_free; | ||
196 | } | ||
197 | |||
198 | return 0; | ||
199 | |||
200 | out_free: | ||
201 | perf_evlist__free_stats(evlist); | ||
202 | return -1; | ||
203 | } | ||
204 | |||
163 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 205 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
164 | static struct stats runtime_cycles_stats[MAX_NR_CPUS]; | 206 | static struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
165 | static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; | 207 | static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; |
@@ -173,6 +215,29 @@ static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | |||
173 | static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | 215 | static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; |
174 | static struct stats walltime_nsecs_stats; | 216 | static struct stats walltime_nsecs_stats; |
175 | 217 | ||
218 | static void perf_stat__reset_stats(struct perf_evlist *evlist) | ||
219 | { | ||
220 | struct perf_evsel *evsel; | ||
221 | |||
222 | list_for_each_entry(evsel, &evlist->entries, node) { | ||
223 | perf_evsel__reset_stat_priv(evsel); | ||
224 | perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); | ||
225 | } | ||
226 | |||
227 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | ||
228 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | ||
229 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | ||
230 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | ||
231 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | ||
232 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | ||
233 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | ||
234 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | ||
235 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | ||
236 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | ||
237 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | ||
238 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
239 | } | ||
240 | |||
176 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 241 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
177 | { | 242 | { |
178 | struct perf_event_attr *attr = &evsel->attr; | 243 | struct perf_event_attr *attr = &evsel->attr; |
@@ -249,7 +314,7 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
249 | int i; | 314 | int i; |
250 | 315 | ||
251 | if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), | 316 | if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), |
252 | evsel_list->threads->nr, scale) < 0) | 317 | thread_map__nr(evsel_list->threads), scale) < 0) |
253 | return -1; | 318 | return -1; |
254 | 319 | ||
255 | for (i = 0; i < 3; i++) | 320 | for (i = 0; i < 3; i++) |
@@ -297,56 +362,68 @@ static void print_interval(void) | |||
297 | struct timespec ts, rs; | 362 | struct timespec ts, rs; |
298 | char prefix[64]; | 363 | char prefix[64]; |
299 | 364 | ||
300 | if (no_aggr) { | 365 | if (aggr_mode == AGGR_GLOBAL) { |
301 | list_for_each_entry(counter, &evsel_list->entries, node) { | 366 | list_for_each_entry(counter, &evsel_list->entries, node) { |
302 | ps = counter->priv; | 367 | ps = counter->priv; |
303 | memset(ps->res_stats, 0, sizeof(ps->res_stats)); | 368 | memset(ps->res_stats, 0, sizeof(ps->res_stats)); |
304 | read_counter(counter); | 369 | read_counter_aggr(counter); |
305 | } | 370 | } |
306 | } else { | 371 | } else { |
307 | list_for_each_entry(counter, &evsel_list->entries, node) { | 372 | list_for_each_entry(counter, &evsel_list->entries, node) { |
308 | ps = counter->priv; | 373 | ps = counter->priv; |
309 | memset(ps->res_stats, 0, sizeof(ps->res_stats)); | 374 | memset(ps->res_stats, 0, sizeof(ps->res_stats)); |
310 | read_counter_aggr(counter); | 375 | read_counter(counter); |
311 | } | 376 | } |
312 | } | 377 | } |
378 | |||
313 | clock_gettime(CLOCK_MONOTONIC, &ts); | 379 | clock_gettime(CLOCK_MONOTONIC, &ts); |
314 | diff_timespec(&rs, &ts, &ref_time); | 380 | diff_timespec(&rs, &ts, &ref_time); |
315 | sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); | 381 | sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); |
316 | 382 | ||
317 | if (num_print_interval == 0 && !csv_output) { | 383 | if (num_print_interval == 0 && !csv_output) { |
318 | if (aggr_socket) | 384 | switch (aggr_mode) { |
385 | case AGGR_SOCKET: | ||
319 | fprintf(output, "# time socket cpus counts events\n"); | 386 | fprintf(output, "# time socket cpus counts events\n"); |
320 | else if (no_aggr) | 387 | break; |
388 | case AGGR_CORE: | ||
389 | fprintf(output, "# time core cpus counts events\n"); | ||
390 | break; | ||
391 | case AGGR_NONE: | ||
321 | fprintf(output, "# time CPU counts events\n"); | 392 | fprintf(output, "# time CPU counts events\n"); |
322 | else | 393 | break; |
394 | case AGGR_GLOBAL: | ||
395 | default: | ||
323 | fprintf(output, "# time counts events\n"); | 396 | fprintf(output, "# time counts events\n"); |
397 | } | ||
324 | } | 398 | } |
325 | 399 | ||
326 | if (++num_print_interval == 25) | 400 | if (++num_print_interval == 25) |
327 | num_print_interval = 0; | 401 | num_print_interval = 0; |
328 | 402 | ||
329 | if (aggr_socket) | 403 | switch (aggr_mode) { |
330 | print_aggr_socket(prefix); | 404 | case AGGR_CORE: |
331 | else if (no_aggr) { | 405 | case AGGR_SOCKET: |
406 | print_aggr(prefix); | ||
407 | break; | ||
408 | case AGGR_NONE: | ||
332 | list_for_each_entry(counter, &evsel_list->entries, node) | 409 | list_for_each_entry(counter, &evsel_list->entries, node) |
333 | print_counter(counter, prefix); | 410 | print_counter(counter, prefix); |
334 | } else { | 411 | break; |
412 | case AGGR_GLOBAL: | ||
413 | default: | ||
335 | list_for_each_entry(counter, &evsel_list->entries, node) | 414 | list_for_each_entry(counter, &evsel_list->entries, node) |
336 | print_counter_aggr(counter, prefix); | 415 | print_counter_aggr(counter, prefix); |
337 | } | 416 | } |
338 | } | 417 | } |
339 | 418 | ||
340 | static int __run_perf_stat(int argc __maybe_unused, const char **argv) | 419 | static int __run_perf_stat(int argc, const char **argv) |
341 | { | 420 | { |
342 | char msg[512]; | 421 | char msg[512]; |
343 | unsigned long long t0, t1; | 422 | unsigned long long t0, t1; |
344 | struct perf_evsel *counter; | 423 | struct perf_evsel *counter; |
345 | struct timespec ts; | 424 | struct timespec ts; |
346 | int status = 0; | 425 | int status = 0; |
347 | int child_ready_pipe[2], go_pipe[2]; | ||
348 | const bool forks = (argc > 0); | 426 | const bool forks = (argc > 0); |
349 | char buf; | ||
350 | 427 | ||
351 | if (interval) { | 428 | if (interval) { |
352 | ts.tv_sec = interval / 1000; | 429 | ts.tv_sec = interval / 1000; |
@@ -356,61 +433,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) | |||
356 | ts.tv_nsec = 0; | 433 | ts.tv_nsec = 0; |
357 | } | 434 | } |
358 | 435 | ||
359 | if (aggr_socket | ||
360 | && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) { | ||
361 | perror("cannot build socket map"); | ||
362 | return -1; | ||
363 | } | ||
364 | |||
365 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | ||
366 | perror("failed to create pipes"); | ||
367 | return -1; | ||
368 | } | ||
369 | |||
370 | if (forks) { | 436 | if (forks) { |
371 | if ((child_pid = fork()) < 0) | 437 | if (perf_evlist__prepare_workload(evsel_list, &target, argv, |
372 | perror("failed to fork"); | 438 | false, false) < 0) { |
373 | 439 | perror("failed to prepare workload"); | |
374 | if (!child_pid) { | 440 | return -1; |
375 | close(child_ready_pipe[0]); | ||
376 | close(go_pipe[1]); | ||
377 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | ||
378 | |||
379 | /* | ||
380 | * Do a dummy execvp to get the PLT entry resolved, | ||
381 | * so we avoid the resolver overhead on the real | ||
382 | * execvp call. | ||
383 | */ | ||
384 | execvp("", (char **)argv); | ||
385 | |||
386 | /* | ||
387 | * Tell the parent we're ready to go | ||
388 | */ | ||
389 | close(child_ready_pipe[1]); | ||
390 | |||
391 | /* | ||
392 | * Wait until the parent tells us to go. | ||
393 | */ | ||
394 | if (read(go_pipe[0], &buf, 1) == -1) | ||
395 | perror("unable to read pipe"); | ||
396 | |||
397 | execvp(argv[0], (char **)argv); | ||
398 | |||
399 | perror(argv[0]); | ||
400 | exit(-1); | ||
401 | } | 441 | } |
402 | |||
403 | if (perf_target__none(&target)) | ||
404 | evsel_list->threads->map[0] = child_pid; | ||
405 | |||
406 | /* | ||
407 | * Wait for the child to be ready to exec. | ||
408 | */ | ||
409 | close(child_ready_pipe[1]); | ||
410 | close(go_pipe[0]); | ||
411 | if (read(child_ready_pipe[0], &buf, 1) == -1) | ||
412 | perror("unable to read pipe"); | ||
413 | close(child_ready_pipe[0]); | ||
414 | } | 442 | } |
415 | 443 | ||
416 | if (group) | 444 | if (group) |
@@ -457,7 +485,8 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) | |||
457 | clock_gettime(CLOCK_MONOTONIC, &ref_time); | 485 | clock_gettime(CLOCK_MONOTONIC, &ref_time); |
458 | 486 | ||
459 | if (forks) { | 487 | if (forks) { |
460 | close(go_pipe[1]); | 488 | perf_evlist__start_workload(evsel_list); |
489 | |||
461 | if (interval) { | 490 | if (interval) { |
462 | while (!waitpid(child_pid, &status, WNOHANG)) { | 491 | while (!waitpid(child_pid, &status, WNOHANG)) { |
463 | nanosleep(&ts, NULL); | 492 | nanosleep(&ts, NULL); |
@@ -479,16 +508,16 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) | |||
479 | 508 | ||
480 | update_stats(&walltime_nsecs_stats, t1 - t0); | 509 | update_stats(&walltime_nsecs_stats, t1 - t0); |
481 | 510 | ||
482 | if (no_aggr) { | 511 | if (aggr_mode == AGGR_GLOBAL) { |
483 | list_for_each_entry(counter, &evsel_list->entries, node) { | 512 | list_for_each_entry(counter, &evsel_list->entries, node) { |
484 | read_counter(counter); | 513 | read_counter_aggr(counter); |
485 | perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); | 514 | perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), |
515 | thread_map__nr(evsel_list->threads)); | ||
486 | } | 516 | } |
487 | } else { | 517 | } else { |
488 | list_for_each_entry(counter, &evsel_list->entries, node) { | 518 | list_for_each_entry(counter, &evsel_list->entries, node) { |
489 | read_counter_aggr(counter); | 519 | read_counter(counter); |
490 | perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), | 520 | perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); |
491 | evsel_list->threads->nr); | ||
492 | } | 521 | } |
493 | } | 522 | } |
494 | 523 | ||
@@ -542,26 +571,47 @@ static void print_noise(struct perf_evsel *evsel, double avg) | |||
542 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); | 571 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); |
543 | } | 572 | } |
544 | 573 | ||
545 | static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) | 574 | static void aggr_printout(struct perf_evsel *evsel, int id, int nr) |
546 | { | 575 | { |
547 | double msecs = avg / 1e6; | 576 | switch (aggr_mode) { |
548 | char cpustr[16] = { '\0', }; | 577 | case AGGR_CORE: |
549 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; | 578 | fprintf(output, "S%d-C%*d%s%*d%s", |
550 | 579 | cpu_map__id_to_socket(id), | |
551 | if (aggr_socket) | 580 | csv_output ? 0 : -8, |
552 | sprintf(cpustr, "S%*d%s%*d%s", | 581 | cpu_map__id_to_cpu(id), |
582 | csv_sep, | ||
583 | csv_output ? 0 : 4, | ||
584 | nr, | ||
585 | csv_sep); | ||
586 | break; | ||
587 | case AGGR_SOCKET: | ||
588 | fprintf(output, "S%*d%s%*d%s", | ||
553 | csv_output ? 0 : -5, | 589 | csv_output ? 0 : -5, |
554 | cpu, | 590 | id, |
555 | csv_sep, | 591 | csv_sep, |
556 | csv_output ? 0 : 4, | 592 | csv_output ? 0 : 4, |
557 | nr, | 593 | nr, |
558 | csv_sep); | 594 | csv_sep); |
559 | else if (no_aggr) | 595 | break; |
560 | sprintf(cpustr, "CPU%*d%s", | 596 | case AGGR_NONE: |
597 | fprintf(output, "CPU%*d%s", | ||
561 | csv_output ? 0 : -4, | 598 | csv_output ? 0 : -4, |
562 | perf_evsel__cpus(evsel)->map[cpu], csv_sep); | 599 | perf_evsel__cpus(evsel)->map[id], csv_sep); |
600 | break; | ||
601 | case AGGR_GLOBAL: | ||
602 | default: | ||
603 | break; | ||
604 | } | ||
605 | } | ||
606 | |||
607 | static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) | ||
608 | { | ||
609 | double msecs = avg / 1e6; | ||
610 | const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; | ||
611 | |||
612 | aggr_printout(evsel, cpu, nr); | ||
563 | 613 | ||
564 | fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); | 614 | fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel)); |
565 | 615 | ||
566 | if (evsel->cgrp) | 616 | if (evsel->cgrp) |
567 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 617 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
@@ -758,32 +808,21 @@ static void print_ll_cache_misses(int cpu, | |||
758 | static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) | 808 | static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) |
759 | { | 809 | { |
760 | double total, ratio = 0.0; | 810 | double total, ratio = 0.0; |
761 | char cpustr[16] = { '\0', }; | ||
762 | const char *fmt; | 811 | const char *fmt; |
763 | 812 | ||
764 | if (csv_output) | 813 | if (csv_output) |
765 | fmt = "%s%.0f%s%s"; | 814 | fmt = "%.0f%s%s"; |
766 | else if (big_num) | 815 | else if (big_num) |
767 | fmt = "%s%'18.0f%s%-25s"; | 816 | fmt = "%'18.0f%s%-25s"; |
768 | else | 817 | else |
769 | fmt = "%s%18.0f%s%-25s"; | 818 | fmt = "%18.0f%s%-25s"; |
770 | 819 | ||
771 | if (aggr_socket) | 820 | aggr_printout(evsel, cpu, nr); |
772 | sprintf(cpustr, "S%*d%s%*d%s", | 821 | |
773 | csv_output ? 0 : -5, | 822 | if (aggr_mode == AGGR_GLOBAL) |
774 | cpu, | ||
775 | csv_sep, | ||
776 | csv_output ? 0 : 4, | ||
777 | nr, | ||
778 | csv_sep); | ||
779 | else if (no_aggr) | ||
780 | sprintf(cpustr, "CPU%*d%s", | ||
781 | csv_output ? 0 : -4, | ||
782 | perf_evsel__cpus(evsel)->map[cpu], csv_sep); | ||
783 | else | ||
784 | cpu = 0; | 823 | cpu = 0; |
785 | 824 | ||
786 | fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); | 825 | fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); |
787 | 826 | ||
788 | if (evsel->cgrp) | 827 | if (evsel->cgrp) |
789 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 828 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
@@ -882,23 +921,23 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) | |||
882 | } | 921 | } |
883 | } | 922 | } |
884 | 923 | ||
885 | static void print_aggr_socket(char *prefix) | 924 | static void print_aggr(char *prefix) |
886 | { | 925 | { |
887 | struct perf_evsel *counter; | 926 | struct perf_evsel *counter; |
927 | int cpu, s, s2, id, nr; | ||
888 | u64 ena, run, val; | 928 | u64 ena, run, val; |
889 | int cpu, s, s2, sock, nr; | ||
890 | 929 | ||
891 | if (!sock_map) | 930 | if (!(aggr_map || aggr_get_id)) |
892 | return; | 931 | return; |
893 | 932 | ||
894 | for (s = 0; s < sock_map->nr; s++) { | 933 | for (s = 0; s < aggr_map->nr; s++) { |
895 | sock = cpu_map__socket(sock_map, s); | 934 | id = aggr_map->map[s]; |
896 | list_for_each_entry(counter, &evsel_list->entries, node) { | 935 | list_for_each_entry(counter, &evsel_list->entries, node) { |
897 | val = ena = run = 0; | 936 | val = ena = run = 0; |
898 | nr = 0; | 937 | nr = 0; |
899 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { | 938 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { |
900 | s2 = cpu_map__get_socket(evsel_list->cpus, cpu); | 939 | s2 = aggr_get_id(evsel_list->cpus, cpu); |
901 | if (s2 != sock) | 940 | if (s2 != id) |
902 | continue; | 941 | continue; |
903 | val += counter->counts->cpu[cpu].val; | 942 | val += counter->counts->cpu[cpu].val; |
904 | ena += counter->counts->cpu[cpu].ena; | 943 | ena += counter->counts->cpu[cpu].ena; |
@@ -909,18 +948,15 @@ static void print_aggr_socket(char *prefix) | |||
909 | fprintf(output, "%s", prefix); | 948 | fprintf(output, "%s", prefix); |
910 | 949 | ||
911 | if (run == 0 || ena == 0) { | 950 | if (run == 0 || ena == 0) { |
912 | fprintf(output, "S%*d%s%*d%s%*s%s%*s", | 951 | aggr_printout(counter, cpu, nr); |
913 | csv_output ? 0 : -5, | 952 | |
914 | s, | 953 | fprintf(output, "%*s%s%*s", |
915 | csv_sep, | ||
916 | csv_output ? 0 : 4, | ||
917 | nr, | ||
918 | csv_sep, | ||
919 | csv_output ? 0 : 18, | 954 | csv_output ? 0 : 18, |
920 | counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, | 955 | counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, |
921 | csv_sep, | 956 | csv_sep, |
922 | csv_output ? 0 : -24, | 957 | csv_output ? 0 : -24, |
923 | perf_evsel__name(counter)); | 958 | perf_evsel__name(counter)); |
959 | |||
924 | if (counter->cgrp) | 960 | if (counter->cgrp) |
925 | fprintf(output, "%s%s", | 961 | fprintf(output, "%s%s", |
926 | csv_sep, counter->cgrp->name); | 962 | csv_sep, counter->cgrp->name); |
@@ -930,9 +966,9 @@ static void print_aggr_socket(char *prefix) | |||
930 | } | 966 | } |
931 | 967 | ||
932 | if (nsec_counter(counter)) | 968 | if (nsec_counter(counter)) |
933 | nsec_printout(sock, nr, counter, val); | 969 | nsec_printout(id, nr, counter, val); |
934 | else | 970 | else |
935 | abs_printout(sock, nr, counter, val); | 971 | abs_printout(id, nr, counter, val); |
936 | 972 | ||
937 | if (!csv_output) { | 973 | if (!csv_output) { |
938 | print_noise(counter, 1.0); | 974 | print_noise(counter, 1.0); |
@@ -1073,14 +1109,21 @@ static void print_stat(int argc, const char **argv) | |||
1073 | fprintf(output, ":\n\n"); | 1109 | fprintf(output, ":\n\n"); |
1074 | } | 1110 | } |
1075 | 1111 | ||
1076 | if (aggr_socket) | 1112 | switch (aggr_mode) { |
1077 | print_aggr_socket(NULL); | 1113 | case AGGR_CORE: |
1078 | else if (no_aggr) { | 1114 | case AGGR_SOCKET: |
1079 | list_for_each_entry(counter, &evsel_list->entries, node) | 1115 | print_aggr(NULL); |
1080 | print_counter(counter, NULL); | 1116 | break; |
1081 | } else { | 1117 | case AGGR_GLOBAL: |
1082 | list_for_each_entry(counter, &evsel_list->entries, node) | 1118 | list_for_each_entry(counter, &evsel_list->entries, node) |
1083 | print_counter_aggr(counter, NULL); | 1119 | print_counter_aggr(counter, NULL); |
1120 | break; | ||
1121 | case AGGR_NONE: | ||
1122 | list_for_each_entry(counter, &evsel_list->entries, node) | ||
1123 | print_counter(counter, NULL); | ||
1124 | break; | ||
1125 | default: | ||
1126 | break; | ||
1084 | } | 1127 | } |
1085 | 1128 | ||
1086 | if (!csv_output) { | 1129 | if (!csv_output) { |
@@ -1126,6 +1169,32 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, | |||
1126 | return 0; | 1169 | return 0; |
1127 | } | 1170 | } |
1128 | 1171 | ||
1172 | static int perf_stat_init_aggr_mode(void) | ||
1173 | { | ||
1174 | switch (aggr_mode) { | ||
1175 | case AGGR_SOCKET: | ||
1176 | if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { | ||
1177 | perror("cannot build socket map"); | ||
1178 | return -1; | ||
1179 | } | ||
1180 | aggr_get_id = cpu_map__get_socket; | ||
1181 | break; | ||
1182 | case AGGR_CORE: | ||
1183 | if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { | ||
1184 | perror("cannot build core map"); | ||
1185 | return -1; | ||
1186 | } | ||
1187 | aggr_get_id = cpu_map__get_core; | ||
1188 | break; | ||
1189 | case AGGR_NONE: | ||
1190 | case AGGR_GLOBAL: | ||
1191 | default: | ||
1192 | break; | ||
1193 | } | ||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | |||
1129 | /* | 1198 | /* |
1130 | * Add default attributes, if there were no attributes specified or | 1199 | * Add default attributes, if there were no attributes specified or |
1131 | * if -d/--detailed, -d -d or -d -d -d is used: | 1200 | * if -d/--detailed, -d -d or -d -d -d is used: |
@@ -1296,7 +1365,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1296 | OPT_INCR('v', "verbose", &verbose, | 1365 | OPT_INCR('v', "verbose", &verbose, |
1297 | "be more verbose (show counter open errors, etc)"), | 1366 | "be more verbose (show counter open errors, etc)"), |
1298 | OPT_INTEGER('r', "repeat", &run_count, | 1367 | OPT_INTEGER('r', "repeat", &run_count, |
1299 | "repeat command and print average + stddev (max: 100)"), | 1368 | "repeat command and print average + stddev (max: 100, forever: 0)"), |
1300 | OPT_BOOLEAN('n', "null", &null_run, | 1369 | OPT_BOOLEAN('n', "null", &null_run, |
1301 | "null run - dont start any counters"), | 1370 | "null run - dont start any counters"), |
1302 | OPT_INCR('d', "detailed", &detailed_run, | 1371 | OPT_INCR('d', "detailed", &detailed_run, |
@@ -1308,7 +1377,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1308 | stat__set_big_num), | 1377 | stat__set_big_num), |
1309 | OPT_STRING('C', "cpu", &target.cpu_list, "cpu", | 1378 | OPT_STRING('C', "cpu", &target.cpu_list, "cpu", |
1310 | "list of cpus to monitor in system-wide"), | 1379 | "list of cpus to monitor in system-wide"), |
1311 | OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"), | 1380 | OPT_SET_UINT('A', "no-aggr", &aggr_mode, |
1381 | "disable CPU count aggregation", AGGR_NONE), | ||
1312 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | 1382 | OPT_STRING('x', "field-separator", &csv_sep, "separator", |
1313 | "print counts with custom separator"), | 1383 | "print counts with custom separator"), |
1314 | OPT_CALLBACK('G', "cgroup", &evsel_list, "name", | 1384 | OPT_CALLBACK('G', "cgroup", &evsel_list, "name", |
@@ -1323,20 +1393,22 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1323 | "command to run after to the measured command"), | 1393 | "command to run after to the measured command"), |
1324 | OPT_UINTEGER('I', "interval-print", &interval, | 1394 | OPT_UINTEGER('I', "interval-print", &interval, |
1325 | "print counts at regular interval in ms (>= 100)"), | 1395 | "print counts at regular interval in ms (>= 100)"), |
1326 | OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), | 1396 | OPT_SET_UINT(0, "per-socket", &aggr_mode, |
1397 | "aggregate counts per processor socket", AGGR_SOCKET), | ||
1398 | OPT_SET_UINT(0, "per-core", &aggr_mode, | ||
1399 | "aggregate counts per physical processor core", AGGR_CORE), | ||
1327 | OPT_END() | 1400 | OPT_END() |
1328 | }; | 1401 | }; |
1329 | const char * const stat_usage[] = { | 1402 | const char * const stat_usage[] = { |
1330 | "perf stat [<options>] [<command>]", | 1403 | "perf stat [<options>] [<command>]", |
1331 | NULL | 1404 | NULL |
1332 | }; | 1405 | }; |
1333 | struct perf_evsel *pos; | ||
1334 | int status = -ENOMEM, run_idx; | 1406 | int status = -ENOMEM, run_idx; |
1335 | const char *mode; | 1407 | const char *mode; |
1336 | 1408 | ||
1337 | setlocale(LC_ALL, ""); | 1409 | setlocale(LC_ALL, ""); |
1338 | 1410 | ||
1339 | evsel_list = perf_evlist__new(NULL, NULL); | 1411 | evsel_list = perf_evlist__new(); |
1340 | if (evsel_list == NULL) | 1412 | if (evsel_list == NULL) |
1341 | return -ENOMEM; | 1413 | return -ENOMEM; |
1342 | 1414 | ||
@@ -1399,23 +1471,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1399 | 1471 | ||
1400 | if (!argc && !perf_target__has_task(&target)) | 1472 | if (!argc && !perf_target__has_task(&target)) |
1401 | usage_with_options(stat_usage, options); | 1473 | usage_with_options(stat_usage, options); |
1402 | if (run_count <= 0) | 1474 | if (run_count < 0) { |
1403 | usage_with_options(stat_usage, options); | 1475 | usage_with_options(stat_usage, options); |
1476 | } else if (run_count == 0) { | ||
1477 | forever = true; | ||
1478 | run_count = 1; | ||
1479 | } | ||
1404 | 1480 | ||
1405 | /* no_aggr, cgroup are for system-wide only */ | 1481 | /* no_aggr, cgroup are for system-wide only */ |
1406 | if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) { | 1482 | if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) |
1483 | && !perf_target__has_cpu(&target)) { | ||
1407 | fprintf(stderr, "both cgroup and no-aggregation " | 1484 | fprintf(stderr, "both cgroup and no-aggregation " |
1408 | "modes only available in system-wide mode\n"); | 1485 | "modes only available in system-wide mode\n"); |
1409 | 1486 | ||
1410 | usage_with_options(stat_usage, options); | 1487 | usage_with_options(stat_usage, options); |
1411 | } | 1488 | return -1; |
1412 | |||
1413 | if (aggr_socket) { | ||
1414 | if (!perf_target__has_cpu(&target)) { | ||
1415 | fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n"); | ||
1416 | usage_with_options(stat_usage, options); | ||
1417 | } | ||
1418 | no_aggr = true; | ||
1419 | } | 1489 | } |
1420 | 1490 | ||
1421 | if (add_default_attributes()) | 1491 | if (add_default_attributes()) |
@@ -1438,17 +1508,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1438 | return -1; | 1508 | return -1; |
1439 | } | 1509 | } |
1440 | 1510 | ||
1441 | list_for_each_entry(pos, &evsel_list->entries, node) { | 1511 | if (perf_evlist__alloc_stats(evsel_list, interval)) |
1442 | if (perf_evsel__alloc_stat_priv(pos) < 0 || | 1512 | goto out_free_maps; |
1443 | perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) | 1513 | |
1444 | goto out_free_fd; | 1514 | if (perf_stat_init_aggr_mode()) |
1445 | } | 1515 | goto out; |
1446 | if (interval) { | ||
1447 | list_for_each_entry(pos, &evsel_list->entries, node) { | ||
1448 | if (perf_evsel__alloc_prev_raw_counts(pos) < 0) | ||
1449 | goto out_free_fd; | ||
1450 | } | ||
1451 | } | ||
1452 | 1516 | ||
1453 | /* | 1517 | /* |
1454 | * We dont want to block the signals - that would cause | 1518 | * We dont want to block the signals - that would cause |
@@ -1457,28 +1521,30 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1457 | * task, but being ignored by perf stat itself: | 1521 | * task, but being ignored by perf stat itself: |
1458 | */ | 1522 | */ |
1459 | atexit(sig_atexit); | 1523 | atexit(sig_atexit); |
1460 | signal(SIGINT, skip_signal); | 1524 | if (!forever) |
1525 | signal(SIGINT, skip_signal); | ||
1461 | signal(SIGCHLD, skip_signal); | 1526 | signal(SIGCHLD, skip_signal); |
1462 | signal(SIGALRM, skip_signal); | 1527 | signal(SIGALRM, skip_signal); |
1463 | signal(SIGABRT, skip_signal); | 1528 | signal(SIGABRT, skip_signal); |
1464 | 1529 | ||
1465 | status = 0; | 1530 | status = 0; |
1466 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 1531 | for (run_idx = 0; forever || run_idx < run_count; run_idx++) { |
1467 | if (run_count != 1 && verbose) | 1532 | if (run_count != 1 && verbose) |
1468 | fprintf(output, "[ perf stat: executing run #%d ... ]\n", | 1533 | fprintf(output, "[ perf stat: executing run #%d ... ]\n", |
1469 | run_idx + 1); | 1534 | run_idx + 1); |
1470 | 1535 | ||
1471 | status = run_perf_stat(argc, argv); | 1536 | status = run_perf_stat(argc, argv); |
1537 | if (forever && status != -1) { | ||
1538 | print_stat(argc, argv); | ||
1539 | perf_stat__reset_stats(evsel_list); | ||
1540 | } | ||
1472 | } | 1541 | } |
1473 | 1542 | ||
1474 | if (status != -1 && !interval) | 1543 | if (!forever && status != -1 && !interval) |
1475 | print_stat(argc, argv); | 1544 | print_stat(argc, argv); |
1476 | out_free_fd: | 1545 | |
1477 | list_for_each_entry(pos, &evsel_list->entries, node) { | 1546 | perf_evlist__free_stats(evsel_list); |
1478 | perf_evsel__free_stat_priv(pos); | 1547 | out_free_maps: |
1479 | perf_evsel__free_counts(pos); | ||
1480 | perf_evsel__free_prev_raw_counts(pos); | ||
1481 | } | ||
1482 | perf_evlist__delete_maps(evsel_list); | 1548 | perf_evlist__delete_maps(evsel_list); |
1483 | out: | 1549 | out: |
1484 | perf_evlist__delete(evsel_list); | 1550 | perf_evlist__delete(evsel_list); |