diff options
author | Stephane Eranian <eranian@google.com> | 2013-02-06 09:46:02 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-02-06 16:09:27 -0500 |
commit | d7e7a451c13e784f497c054f1bd083d77be87498 (patch) | |
tree | 24aa7acacd6cd74da0b2ecaf4c9a40724fa2f107 /tools | |
parent | 5ac59a8a77e3faa1eaf9bfe82a61e9396b082c3d (diff) |
perf stat: Add per processor socket count aggregation
This patch adds per-processor socket count aggregation for system-wide
mode measurements. This is a useful mode to detect imbalance between
sockets.
To enable this mode, use --aggr-socket in addition
to -a. (system-wide).
The output includes the socket number and the number of online
processors on that socket. This is useful to gauge the amount of
aggregation.
# ./perf stat -I 1000 -a --aggr-socket -e cycles sleep 2
# time socket cpus counts events
1.000097680 S0 4 5,788,785 cycles
2.000379943 S0 4 27,361,546 cycles
2.001167808 S0 4 818,275 cycles
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1360161962-9675-3-git-send-email-eranian@google.com
[ committer note: Added missing man page entry based on above comments ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 9 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 126 |
2 files changed, 123 insertions, 12 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 5289da3344e9..faf4f4feebcc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m | |||
116 | 116 | ||
117 | -I msecs:: | 117 | -I msecs:: |
118 | --interval-print msecs:: | 118 | --interval-print msecs:: |
119 | print count deltas every N milliseconds (minimum: 100ms) | 119 | Print count deltas every N milliseconds (minimum: 100ms) |
120 | example: perf stat -I 1000 -e cycles -a sleep 5 | 120 | example: perf stat -I 1000 -e cycles -a sleep 5 |
121 | 121 | ||
122 | --aggr-socket:: | ||
123 | Aggregate counts per processor socket for system-wide mode measurements. This | ||
124 | is a useful mode to detect imbalance between sockets. To enable this mode, | ||
125 | use --aggr-socket in addition to -a. (system-wide). The output includes the | ||
126 | socket number and the number of online processors on that socket. This is | ||
127 | useful to gauge the amount of aggregation. | ||
128 | |||
122 | EXAMPLES | 129 | EXAMPLES |
123 | -------- | 130 | -------- |
124 | 131 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0368a1036ad6..99848761f573 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -68,6 +68,7 @@ | |||
68 | static void print_stat(int argc, const char **argv); | 68 | static void print_stat(int argc, const char **argv); |
69 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix); | 69 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix); |
70 | static void print_counter(struct perf_evsel *counter, char *prefix); | 70 | static void print_counter(struct perf_evsel *counter, char *prefix); |
71 | static void print_aggr_socket(char *prefix); | ||
71 | 72 | ||
72 | static struct perf_evlist *evsel_list; | 73 | static struct perf_evlist *evsel_list; |
73 | 74 | ||
@@ -79,6 +80,7 @@ static int run_count = 1; | |||
79 | static bool no_inherit = false; | 80 | static bool no_inherit = false; |
80 | static bool scale = true; | 81 | static bool scale = true; |
81 | static bool no_aggr = false; | 82 | static bool no_aggr = false; |
83 | static bool aggr_socket = false; | ||
82 | static pid_t child_pid = -1; | 84 | static pid_t child_pid = -1; |
83 | static bool null_run = false; | 85 | static bool null_run = false; |
84 | static int detailed_run = 0; | 86 | static int detailed_run = 0; |
@@ -93,6 +95,7 @@ static const char *post_cmd = NULL; | |||
93 | static bool sync_run = false; | 95 | static bool sync_run = false; |
94 | static unsigned int interval = 0; | 96 | static unsigned int interval = 0; |
95 | static struct timespec ref_time; | 97 | static struct timespec ref_time; |
98 | static struct cpu_map *sock_map; | ||
96 | 99 | ||
97 | static volatile int done = 0; | 100 | static volatile int done = 0; |
98 | 101 | ||
@@ -312,7 +315,9 @@ static void print_interval(void) | |||
312 | sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); | 315 | sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); |
313 | 316 | ||
314 | if (num_print_interval == 0 && !csv_output) { | 317 | if (num_print_interval == 0 && !csv_output) { |
315 | if (no_aggr) | 318 | if (aggr_socket) |
319 | fprintf(output, "# time socket cpus counts events\n"); | ||
320 | else if (no_aggr) | ||
316 | fprintf(output, "# time CPU counts events\n"); | 321 | fprintf(output, "# time CPU counts events\n"); |
317 | else | 322 | else |
318 | fprintf(output, "# time counts events\n"); | 323 | fprintf(output, "# time counts events\n"); |
@@ -321,7 +326,9 @@ static void print_interval(void) | |||
321 | if (++num_print_interval == 25) | 326 | if (++num_print_interval == 25) |
322 | num_print_interval = 0; | 327 | num_print_interval = 0; |
323 | 328 | ||
324 | if (no_aggr) { | 329 | if (aggr_socket) |
330 | print_aggr_socket(prefix); | ||
331 | else if (no_aggr) { | ||
325 | list_for_each_entry(counter, &evsel_list->entries, node) | 332 | list_for_each_entry(counter, &evsel_list->entries, node) |
326 | print_counter(counter, prefix); | 333 | print_counter(counter, prefix); |
327 | } else { | 334 | } else { |
@@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) | |||
349 | ts.tv_nsec = 0; | 356 | ts.tv_nsec = 0; |
350 | } | 357 | } |
351 | 358 | ||
359 | if (aggr_socket | ||
360 | && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) { | ||
361 | perror("cannot build socket map"); | ||
362 | return -1; | ||
363 | } | ||
364 | |||
352 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | 365 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
353 | perror("failed to create pipes"); | 366 | perror("failed to create pipes"); |
354 | return -1; | 367 | return -1; |
@@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg) | |||
529 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); | 542 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); |
530 | } | 543 | } |
531 | 544 | ||
532 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | 545 | static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) |
533 | { | 546 | { |
534 | double msecs = avg / 1e6; | 547 | double msecs = avg / 1e6; |
535 | char cpustr[16] = { '\0', }; | 548 | char cpustr[16] = { '\0', }; |
536 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; | 549 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; |
537 | 550 | ||
538 | if (no_aggr) | 551 | if (aggr_socket) |
552 | sprintf(cpustr, "S%*d%s%*d%s", | ||
553 | csv_output ? 0 : -5, | ||
554 | cpu, | ||
555 | csv_sep, | ||
556 | csv_output ? 0 : 4, | ||
557 | nr, | ||
558 | csv_sep); | ||
559 | else if (no_aggr) | ||
539 | sprintf(cpustr, "CPU%*d%s", | 560 | sprintf(cpustr, "CPU%*d%s", |
540 | csv_output ? 0 : -4, | 561 | csv_output ? 0 : -4, |
541 | perf_evsel__cpus(evsel)->map[cpu], csv_sep); | 562 | perf_evsel__cpus(evsel)->map[cpu], csv_sep); |
@@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu, | |||
734 | fprintf(output, " of all LL-cache hits "); | 755 | fprintf(output, " of all LL-cache hits "); |
735 | } | 756 | } |
736 | 757 | ||
737 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | 758 | static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) |
738 | { | 759 | { |
739 | double total, ratio = 0.0; | 760 | double total, ratio = 0.0; |
740 | char cpustr[16] = { '\0', }; | 761 | char cpustr[16] = { '\0', }; |
@@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
747 | else | 768 | else |
748 | fmt = "%s%18.0f%s%-25s"; | 769 | fmt = "%s%18.0f%s%-25s"; |
749 | 770 | ||
750 | if (no_aggr) | 771 | if (aggr_socket) |
772 | sprintf(cpustr, "S%*d%s%*d%s", | ||
773 | csv_output ? 0 : -5, | ||
774 | cpu, | ||
775 | csv_sep, | ||
776 | csv_output ? 0 : 4, | ||
777 | nr, | ||
778 | csv_sep); | ||
779 | else if (no_aggr) | ||
751 | sprintf(cpustr, "CPU%*d%s", | 780 | sprintf(cpustr, "CPU%*d%s", |
752 | csv_output ? 0 : -4, | 781 | csv_output ? 0 : -4, |
753 | perf_evsel__cpus(evsel)->map[cpu], csv_sep); | 782 | perf_evsel__cpus(evsel)->map[cpu], csv_sep); |
@@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
853 | } | 882 | } |
854 | } | 883 | } |
855 | 884 | ||
885 | static void print_aggr_socket(char *prefix) | ||
886 | { | ||
887 | struct perf_evsel *counter; | ||
888 | u64 ena, run, val; | ||
889 | int cpu, s, s2, sock, nr; | ||
890 | |||
891 | if (!sock_map) | ||
892 | return; | ||
893 | |||
894 | for (s = 0; s < sock_map->nr; s++) { | ||
895 | sock = cpu_map__socket(sock_map, s); | ||
896 | list_for_each_entry(counter, &evsel_list->entries, node) { | ||
897 | val = ena = run = 0; | ||
898 | nr = 0; | ||
899 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { | ||
900 | s2 = cpu_map__get_socket(evsel_list->cpus, cpu); | ||
901 | if (s2 != sock) | ||
902 | continue; | ||
903 | val += counter->counts->cpu[cpu].val; | ||
904 | ena += counter->counts->cpu[cpu].ena; | ||
905 | run += counter->counts->cpu[cpu].run; | ||
906 | nr++; | ||
907 | } | ||
908 | if (prefix) | ||
909 | fprintf(output, "%s", prefix); | ||
910 | |||
911 | if (run == 0 || ena == 0) { | ||
912 | fprintf(output, "S%*d%s%*d%s%*s%s%*s", | ||
913 | csv_output ? 0 : -5, | ||
914 | s, | ||
915 | csv_sep, | ||
916 | csv_output ? 0 : 4, | ||
917 | nr, | ||
918 | csv_sep, | ||
919 | csv_output ? 0 : 18, | ||
920 | counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, | ||
921 | csv_sep, | ||
922 | csv_output ? 0 : -24, | ||
923 | perf_evsel__name(counter)); | ||
924 | if (counter->cgrp) | ||
925 | fprintf(output, "%s%s", | ||
926 | csv_sep, counter->cgrp->name); | ||
927 | |||
928 | fputc('\n', output); | ||
929 | continue; | ||
930 | } | ||
931 | |||
932 | if (nsec_counter(counter)) | ||
933 | nsec_printout(sock, nr, counter, val); | ||
934 | else | ||
935 | abs_printout(sock, nr, counter, val); | ||
936 | |||
937 | if (!csv_output) { | ||
938 | print_noise(counter, 1.0); | ||
939 | |||
940 | if (run != ena) | ||
941 | fprintf(output, " (%.2f%%)", | ||
942 | 100.0 * run / ena); | ||
943 | } | ||
944 | fputc('\n', output); | ||
945 | } | ||
946 | } | ||
947 | } | ||
948 | |||
856 | /* | 949 | /* |
857 | * Print out the results of a single counter: | 950 | * Print out the results of a single counter: |
858 | * aggregated counts in system-wide mode | 951 | * aggregated counts in system-wide mode |
@@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) | |||
882 | } | 975 | } |
883 | 976 | ||
884 | if (nsec_counter(counter)) | 977 | if (nsec_counter(counter)) |
885 | nsec_printout(-1, counter, avg); | 978 | nsec_printout(-1, 0, counter, avg); |
886 | else | 979 | else |
887 | abs_printout(-1, counter, avg); | 980 | abs_printout(-1, 0, counter, avg); |
888 | 981 | ||
889 | print_noise(counter, avg); | 982 | print_noise(counter, avg); |
890 | 983 | ||
@@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) | |||
940 | } | 1033 | } |
941 | 1034 | ||
942 | if (nsec_counter(counter)) | 1035 | if (nsec_counter(counter)) |
943 | nsec_printout(cpu, counter, val); | 1036 | nsec_printout(cpu, 0, counter, val); |
944 | else | 1037 | else |
945 | abs_printout(cpu, counter, val); | 1038 | abs_printout(cpu, 0, counter, val); |
946 | 1039 | ||
947 | if (!csv_output) { | 1040 | if (!csv_output) { |
948 | print_noise(counter, 1.0); | 1041 | print_noise(counter, 1.0); |
@@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv) | |||
980 | fprintf(output, ":\n\n"); | 1073 | fprintf(output, ":\n\n"); |
981 | } | 1074 | } |
982 | 1075 | ||
983 | if (no_aggr) { | 1076 | if (aggr_socket) |
1077 | print_aggr_socket(NULL); | ||
1078 | else if (no_aggr) { | ||
984 | list_for_each_entry(counter, &evsel_list->entries, node) | 1079 | list_for_each_entry(counter, &evsel_list->entries, node) |
985 | print_counter(counter, NULL); | 1080 | print_counter(counter, NULL); |
986 | } else { | 1081 | } else { |
@@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1228 | "command to run after to the measured command"), | 1323 | "command to run after to the measured command"), |
1229 | OPT_UINTEGER('I', "interval-print", &interval, | 1324 | OPT_UINTEGER('I', "interval-print", &interval, |
1230 | "print counts at regular interval in ms (>= 100)"), | 1325 | "print counts at regular interval in ms (>= 100)"), |
1326 | OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), | ||
1231 | OPT_END() | 1327 | OPT_END() |
1232 | }; | 1328 | }; |
1233 | const char * const stat_usage[] = { | 1329 | const char * const stat_usage[] = { |
@@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1314 | usage_with_options(stat_usage, options); | 1410 | usage_with_options(stat_usage, options); |
1315 | } | 1411 | } |
1316 | 1412 | ||
1413 | if (aggr_socket) { | ||
1414 | if (!perf_target__has_cpu(&target)) { | ||
1415 | fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n"); | ||
1416 | usage_with_options(stat_usage, options); | ||
1417 | } | ||
1418 | no_aggr = true; | ||
1419 | } | ||
1420 | |||
1317 | if (add_default_attributes()) | 1421 | if (add_default_attributes()) |
1318 | goto out; | 1422 | goto out; |
1319 | 1423 | ||