aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-02-06 09:46:02 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-02-06 16:09:27 -0500
commitd7e7a451c13e784f497c054f1bd083d77be87498 (patch)
tree24aa7acacd6cd74da0b2ecaf4c9a40724fa2f107 /tools
parent5ac59a8a77e3faa1eaf9bfe82a61e9396b082c3d (diff)
perf stat: Add per processor socket count aggregation
This patch adds per-processor socket count aggregation for system-wide mode measurements. This is a useful mode to detect imbalance between sockets. To enable this mode, use --aggr-socket in addition to -a. (system-wide). The output includes the socket number and the number of online processors on that socket. This is useful to gauge the amount of aggregation. # ./perf stat -I 1000 -a --aggr-socket -e cycles sleep 2 # time socket cpus counts events 1.000097680 S0 4 5,788,785 cycles 2.000379943 S0 4 27,361,546 cycles 2.001167808 S0 4 818,275 cycles Signed-off-by: Stephane Eranian <eranian@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1360161962-9675-3-git-send-email-eranian@google.com [ committer note: Added missing man page entry based on above comments ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-stat.txt9
-rw-r--r--tools/perf/builtin-stat.c126
2 files changed, 123 insertions, 12 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 5289da3344e9..faf4f4feebcc 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
116 116
117-I msecs:: 117-I msecs::
118--interval-print msecs:: 118--interval-print msecs::
119 print count deltas every N milliseconds (minimum: 100ms) 119 Print count deltas every N milliseconds (minimum: 100ms)
120 example: perf stat -I 1000 -e cycles -a sleep 5 120 example: perf stat -I 1000 -e cycles -a sleep 5
121 121
122--aggr-socket::
123Aggregate counts per processor socket for system-wide mode measurements. This
124is a useful mode to detect imbalance between sockets. To enable this mode,
125use --aggr-socket in addition to -a. (system-wide). The output includes the
126socket number and the number of online processors on that socket. This is
127useful to gauge the amount of aggregation.
128
122EXAMPLES 129EXAMPLES
123-------- 130--------
124 131
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0368a1036ad6..99848761f573 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -68,6 +68,7 @@
68static void print_stat(int argc, const char **argv); 68static void print_stat(int argc, const char **argv);
69static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix); 70static void print_counter(struct perf_evsel *counter, char *prefix);
71static void print_aggr_socket(char *prefix);
71 72
72static struct perf_evlist *evsel_list; 73static struct perf_evlist *evsel_list;
73 74
@@ -79,6 +80,7 @@ static int run_count = 1;
79static bool no_inherit = false; 80static bool no_inherit = false;
80static bool scale = true; 81static bool scale = true;
81static bool no_aggr = false; 82static bool no_aggr = false;
83static bool aggr_socket = false;
82static pid_t child_pid = -1; 84static pid_t child_pid = -1;
83static bool null_run = false; 85static bool null_run = false;
84static int detailed_run = 0; 86static int detailed_run = 0;
@@ -93,6 +95,7 @@ static const char *post_cmd = NULL;
93static bool sync_run = false; 95static bool sync_run = false;
94static unsigned int interval = 0; 96static unsigned int interval = 0;
95static struct timespec ref_time; 97static struct timespec ref_time;
98static struct cpu_map *sock_map;
96 99
97static volatile int done = 0; 100static volatile int done = 0;
98 101
@@ -312,7 +315,9 @@ static void print_interval(void)
312 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 315 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
313 316
314 if (num_print_interval == 0 && !csv_output) { 317 if (num_print_interval == 0 && !csv_output) {
315 if (no_aggr) 318 if (aggr_socket)
319 fprintf(output, "# time socket cpus counts events\n");
320 else if (no_aggr)
316 fprintf(output, "# time CPU counts events\n"); 321 fprintf(output, "# time CPU counts events\n");
317 else 322 else
318 fprintf(output, "# time counts events\n"); 323 fprintf(output, "# time counts events\n");
@@ -321,7 +326,9 @@ static void print_interval(void)
321 if (++num_print_interval == 25) 326 if (++num_print_interval == 25)
322 num_print_interval = 0; 327 num_print_interval = 0;
323 328
324 if (no_aggr) { 329 if (aggr_socket)
330 print_aggr_socket(prefix);
331 else if (no_aggr) {
325 list_for_each_entry(counter, &evsel_list->entries, node) 332 list_for_each_entry(counter, &evsel_list->entries, node)
326 print_counter(counter, prefix); 333 print_counter(counter, prefix);
327 } else { 334 } else {
@@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
349 ts.tv_nsec = 0; 356 ts.tv_nsec = 0;
350 } 357 }
351 358
359 if (aggr_socket
360 && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
361 perror("cannot build socket map");
362 return -1;
363 }
364
352 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 365 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
353 perror("failed to create pipes"); 366 perror("failed to create pipes");
354 return -1; 367 return -1;
@@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
529 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 542 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
530} 543}
531 544
532static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) 545static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
533{ 546{
534 double msecs = avg / 1e6; 547 double msecs = avg / 1e6;
535 char cpustr[16] = { '\0', }; 548 char cpustr[16] = { '\0', };
536 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; 549 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
537 550
538 if (no_aggr) 551 if (aggr_socket)
552 sprintf(cpustr, "S%*d%s%*d%s",
553 csv_output ? 0 : -5,
554 cpu,
555 csv_sep,
556 csv_output ? 0 : 4,
557 nr,
558 csv_sep);
559 else if (no_aggr)
539 sprintf(cpustr, "CPU%*d%s", 560 sprintf(cpustr, "CPU%*d%s",
540 csv_output ? 0 : -4, 561 csv_output ? 0 : -4,
541 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 562 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
734 fprintf(output, " of all LL-cache hits "); 755 fprintf(output, " of all LL-cache hits ");
735} 756}
736 757
737static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) 758static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
738{ 759{
739 double total, ratio = 0.0; 760 double total, ratio = 0.0;
740 char cpustr[16] = { '\0', }; 761 char cpustr[16] = { '\0', };
@@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
747 else 768 else
748 fmt = "%s%18.0f%s%-25s"; 769 fmt = "%s%18.0f%s%-25s";
749 770
750 if (no_aggr) 771 if (aggr_socket)
772 sprintf(cpustr, "S%*d%s%*d%s",
773 csv_output ? 0 : -5,
774 cpu,
775 csv_sep,
776 csv_output ? 0 : 4,
777 nr,
778 csv_sep);
779 else if (no_aggr)
751 sprintf(cpustr, "CPU%*d%s", 780 sprintf(cpustr, "CPU%*d%s",
752 csv_output ? 0 : -4, 781 csv_output ? 0 : -4,
753 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 782 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
853 } 882 }
854} 883}
855 884
885static void print_aggr_socket(char *prefix)
886{
887 struct perf_evsel *counter;
888 u64 ena, run, val;
889 int cpu, s, s2, sock, nr;
890
891 if (!sock_map)
892 return;
893
894 for (s = 0; s < sock_map->nr; s++) {
895 sock = cpu_map__socket(sock_map, s);
896 list_for_each_entry(counter, &evsel_list->entries, node) {
897 val = ena = run = 0;
898 nr = 0;
899 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
900 s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
901 if (s2 != sock)
902 continue;
903 val += counter->counts->cpu[cpu].val;
904 ena += counter->counts->cpu[cpu].ena;
905 run += counter->counts->cpu[cpu].run;
906 nr++;
907 }
908 if (prefix)
909 fprintf(output, "%s", prefix);
910
911 if (run == 0 || ena == 0) {
912 fprintf(output, "S%*d%s%*d%s%*s%s%*s",
913 csv_output ? 0 : -5,
914 s,
915 csv_sep,
916 csv_output ? 0 : 4,
917 nr,
918 csv_sep,
919 csv_output ? 0 : 18,
920 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
921 csv_sep,
922 csv_output ? 0 : -24,
923 perf_evsel__name(counter));
924 if (counter->cgrp)
925 fprintf(output, "%s%s",
926 csv_sep, counter->cgrp->name);
927
928 fputc('\n', output);
929 continue;
930 }
931
932 if (nsec_counter(counter))
933 nsec_printout(sock, nr, counter, val);
934 else
935 abs_printout(sock, nr, counter, val);
936
937 if (!csv_output) {
938 print_noise(counter, 1.0);
939
940 if (run != ena)
941 fprintf(output, " (%.2f%%)",
942 100.0 * run / ena);
943 }
944 fputc('\n', output);
945 }
946 }
947}
948
856/* 949/*
857 * Print out the results of a single counter: 950 * Print out the results of a single counter:
858 * aggregated counts in system-wide mode 951 * aggregated counts in system-wide mode
@@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
882 } 975 }
883 976
884 if (nsec_counter(counter)) 977 if (nsec_counter(counter))
885 nsec_printout(-1, counter, avg); 978 nsec_printout(-1, 0, counter, avg);
886 else 979 else
887 abs_printout(-1, counter, avg); 980 abs_printout(-1, 0, counter, avg);
888 981
889 print_noise(counter, avg); 982 print_noise(counter, avg);
890 983
@@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
940 } 1033 }
941 1034
942 if (nsec_counter(counter)) 1035 if (nsec_counter(counter))
943 nsec_printout(cpu, counter, val); 1036 nsec_printout(cpu, 0, counter, val);
944 else 1037 else
945 abs_printout(cpu, counter, val); 1038 abs_printout(cpu, 0, counter, val);
946 1039
947 if (!csv_output) { 1040 if (!csv_output) {
948 print_noise(counter, 1.0); 1041 print_noise(counter, 1.0);
@@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv)
980 fprintf(output, ":\n\n"); 1073 fprintf(output, ":\n\n");
981 } 1074 }
982 1075
983 if (no_aggr) { 1076 if (aggr_socket)
1077 print_aggr_socket(NULL);
1078 else if (no_aggr) {
984 list_for_each_entry(counter, &evsel_list->entries, node) 1079 list_for_each_entry(counter, &evsel_list->entries, node)
985 print_counter(counter, NULL); 1080 print_counter(counter, NULL);
986 } else { 1081 } else {
@@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1228 "command to run after to the measured command"), 1323 "command to run after to the measured command"),
1229 OPT_UINTEGER('I', "interval-print", &interval, 1324 OPT_UINTEGER('I', "interval-print", &interval,
1230 "print counts at regular interval in ms (>= 100)"), 1325 "print counts at regular interval in ms (>= 100)"),
1326 OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
1231 OPT_END() 1327 OPT_END()
1232 }; 1328 };
1233 const char * const stat_usage[] = { 1329 const char * const stat_usage[] = {
@@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1314 usage_with_options(stat_usage, options); 1410 usage_with_options(stat_usage, options);
1315 } 1411 }
1316 1412
1413 if (aggr_socket) {
1414 if (!perf_target__has_cpu(&target)) {
1415 fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
1416 usage_with_options(stat_usage, options);
1417 }
1418 no_aggr = true;
1419 }
1420
1317 if (add_default_attributes()) 1421 if (add_default_attributes())
1318 goto out; 1422 goto out;
1319 1423