aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2016-02-29 17:36:22 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-03-03 09:10:36 -0500
commit44d49a6002595ccb95712e86ad2857cd55207602 (patch)
treea908486dce7aba6f12062614ca5699ce401ecc11
parent92a61f6412d3a09d6462252a522fa79c9290f405 (diff)
perf stat: Support metrics in --per-core/socket mode
Enable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 2 2966.020381 task-clock (msec) # 2.004 CPUs utilized (100.00%) S0-C0 2 49 context-switches # 0.017 K/sec (100.00%) S0-C0 2 4 cpu-migrations # 0.001 K/sec (100.00%) S0-C0 2 467 page-faults # 0.157 K/sec S0-C0 2 4,599,061,773 cycles # 1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions # 2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec (100.00%) S0-C0 2 81,180,867 branch-misses # 4.26% of all branches S0-C1 2 2965.995373 task-clock (msec) # 2.003 CPUs utilized (100.00%) S0-C1 2 62 context-switches # 0.021 K/sec (100.00%) S0-C1 2 8 cpu-migrations # 0.003 K/sec (100.00%) S0-C1 2 281 page-faults # 0.095 K/sec S0-C1 2 6,347,290 cycles # 0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions # 0.73 insn per cycle (100.00%) S0-C1 2 947,121 branches # 0.319 M/sec (100.00%) S0-C1 2 37,322 branch-misses # 3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri) Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/1456785386-19481-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/builtin-stat.c64
-rw-r--r--tools/perf/util/stat-shadow.c7
2 files changed, 63 insertions, 8 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ffb8221917a..9b5089c5dffe 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -740,6 +740,8 @@ struct outstate {
740 bool newline; 740 bool newline;
741 const char *prefix; 741 const char *prefix;
742 int nfields; 742 int nfields;
743 int id, nr;
744 struct perf_evsel *evsel;
743}; 745};
744 746
745#define METRIC_LEN 35 747#define METRIC_LEN 35
@@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
755{ 757{
756 fputc('\n', os->fh); 758 fputc('\n', os->fh);
757 fputs(os->prefix, os->fh); 759 fputs(os->prefix, os->fh);
760 aggr_printout(os->evsel, os->id, os->nr);
758 if (stat_config.aggr_mode == AGGR_NONE) 761 if (stat_config.aggr_mode == AGGR_NONE)
759 fprintf(os->fh, " "); 762 fprintf(os->fh, " ");
760 if (stat_config.aggr_mode == AGGR_CORE)
761 fprintf(os->fh, " ");
762 if (stat_config.aggr_mode == AGGR_SOCKET)
763 fprintf(os->fh, " ");
764 fprintf(os->fh, " "); 763 fprintf(os->fh, " ");
765} 764}
766 765
@@ -798,6 +797,7 @@ static void new_line_csv(void *ctx)
798 fputc('\n', os->fh); 797 fputc('\n', os->fh);
799 if (os->prefix) 798 if (os->prefix)
800 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 799 fprintf(os->fh, "%s%s", os->prefix, csv_sep);
800 aggr_printout(os->evsel, os->id, os->nr);
801 for (i = 0; i < os->nfields; i++) 801 for (i = 0; i < os->nfields; i++)
802 fputs(csv_sep, os->fh); 802 fputs(csv_sep, os->fh);
803} 803}
@@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
855 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 855 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
856} 856}
857 857
858static int first_shadow_cpu(struct perf_evsel *evsel, int id)
859{
860 int i;
861
862 if (!aggr_get_id)
863 return 0;
864
865 if (stat_config.aggr_mode == AGGR_NONE)
866 return id;
867
868 if (stat_config.aggr_mode == AGGR_GLOBAL)
869 return 0;
870
871 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
872 int cpu2 = perf_evsel__cpus(evsel)->map[i];
873
874 if (aggr_get_id(evsel_list->cpus, cpu2) == id)
875 return cpu2;
876 }
877 return 0;
878}
879
858static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 880static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
859{ 881{
860 FILE *output = stat_config.output; 882 FILE *output = stat_config.output;
@@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
891 struct perf_stat_output_ctx out; 913 struct perf_stat_output_ctx out;
892 struct outstate os = { 914 struct outstate os = {
893 .fh = stat_config.output, 915 .fh = stat_config.output,
894 .prefix = prefix ? prefix : "" 916 .prefix = prefix ? prefix : "",
917 .id = id,
918 .nr = nr,
919 .evsel = counter,
895 }; 920 };
896 print_metric_t pm = print_metric_std; 921 print_metric_t pm = print_metric_std;
897 void (*nl)(void *); 922 void (*nl)(void *);
@@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
958 } 983 }
959 984
960 perf_stat__print_shadow_stats(counter, uval, 985 perf_stat__print_shadow_stats(counter, uval,
961 stat_config.aggr_mode == AGGR_GLOBAL ? 0 : 986 first_shadow_cpu(counter, id),
962 cpu_map__id_to_cpu(id),
963 &out); 987 &out);
964
965 if (!csv_output) { 988 if (!csv_output) {
966 print_noise(counter, noise); 989 print_noise(counter, noise);
967 print_running(run, ena); 990 print_running(run, ena);
968 } 991 }
969} 992}
970 993
994static void aggr_update_shadow(void)
995{
996 int cpu, s2, id, s;
997 u64 val;
998 struct perf_evsel *counter;
999
1000 for (s = 0; s < aggr_map->nr; s++) {
1001 id = aggr_map->map[s];
1002 evlist__for_each(evsel_list, counter) {
1003 val = 0;
1004 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1005 s2 = aggr_get_id(evsel_list->cpus, cpu);
1006 if (s2 != id)
1007 continue;
1008 val += perf_counts(counter->counts, cpu, 0)->val;
1009 }
1010 val = val * counter->scale;
1011 perf_stat__update_shadow_stats(counter, &val,
1012 first_shadow_cpu(counter, id));
1013 }
1014 }
1015}
1016
971static void print_aggr(char *prefix) 1017static void print_aggr(char *prefix)
972{ 1018{
973 FILE *output = stat_config.output; 1019 FILE *output = stat_config.output;
@@ -979,6 +1025,8 @@ static void print_aggr(char *prefix)
979 if (!(aggr_map || aggr_get_id)) 1025 if (!(aggr_map || aggr_get_id))
980 return; 1026 return;
981 1027
1028 aggr_update_shadow();
1029
982 for (s = 0; s < aggr_map->nr; s++) { 1030 for (s = 0; s < aggr_map->nr; s++) {
983 id = aggr_map->map[s]; 1031 id = aggr_map->map[s];
984 evlist__for_each(evsel_list, counter) { 1032 evlist__for_each(evsel_list, counter) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 367e220e93d5..5e2d2e34e1bc 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -14,6 +14,13 @@ enum {
14 14
15#define NUM_CTX CTX_BIT_MAX 15#define NUM_CTX CTX_BIT_MAX
16 16
17/*
18 * AGGR_GLOBAL: Use CPU 0
19 * AGGR_SOCKET: Use first CPU of socket
20 * AGGR_CORE: Use first CPU of core
21 * AGGR_NONE: Use matching CPU
22 * AGGR_THREAD: Not supported?
23 */
17static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 24static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; 25static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; 26static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];