summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2015-10-25 10:51:18 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-10-27 14:08:07 -0400
commit1e5a29318ba8506f52a8d727b5c6a53143f9882a (patch)
tree30dc329ee8052a0525bf69947035a34bd340fb81 /tools/perf/builtin-stat.c
parent2322f573f8131da9c6d1fab01fe0a0c2c23aa549 (diff)
perf stat: Cache aggregated map entries in extra cpumap
Currently any time we need to access socket or core id for given cpu, we access the sysfs topology file. Adding a cpus_aggr_map cpu_map to cache those entries. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Kan Liang <kan.liang@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1445784728-21732-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c59
1 files changed, 55 insertions, 4 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 91e793a76929..2f438f76cceb 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,8 @@ static struct target target = {
100 .uid = UINT_MAX, 100 .uid = UINT_MAX,
101}; 101};
102 102
103typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
104
103static int run_count = 1; 105static int run_count = 1;
104static bool no_inherit = false; 106static bool no_inherit = false;
105static volatile pid_t child_pid = -1; 107static volatile pid_t child_pid = -1;
@@ -119,7 +121,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */
119static bool forever = false; 121static bool forever = false;
120static struct timespec ref_time; 122static struct timespec ref_time;
121static struct cpu_map *aggr_map; 123static struct cpu_map *aggr_map;
122static int (*aggr_get_id)(struct cpu_map *m, int cpu); 124static aggr_get_id_t aggr_get_id;
123 125
124static volatile int done = 0; 126static volatile int done = 0;
125 127
@@ -954,22 +956,63 @@ static int perf_stat__get_core(struct cpu_map *map, int cpu)
954 return cpu_map__get_core(map, cpu, NULL); 956 return cpu_map__get_core(map, cpu, NULL);
955} 957}
956 958
959static int cpu_map__get_max(struct cpu_map *map)
960{
961 int i, max = -1;
962
963 for (i = 0; i < map->nr; i++) {
964 if (map->map[i] > max)
965 max = map->map[i];
966 }
967
968 return max;
969}
970
971static struct cpu_map *cpus_aggr_map;
972
973static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
974{
975 int cpu;
976
977 if (idx >= map->nr)
978 return -1;
979
980 cpu = map->map[idx];
981
982 if (cpus_aggr_map->map[cpu] == -1)
983 cpus_aggr_map->map[cpu] = get_id(map, idx);
984
985 return cpus_aggr_map->map[cpu];
986}
987
988static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
989{
990 return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
991}
992
993static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
994{
995 return perf_stat__get_aggr(perf_stat__get_core, map, idx);
996}
997
957static int perf_stat_init_aggr_mode(void) 998static int perf_stat_init_aggr_mode(void)
958{ 999{
1000 int nr;
1001
959 switch (stat_config.aggr_mode) { 1002 switch (stat_config.aggr_mode) {
960 case AGGR_SOCKET: 1003 case AGGR_SOCKET:
961 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1004 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
962 perror("cannot build socket map"); 1005 perror("cannot build socket map");
963 return -1; 1006 return -1;
964 } 1007 }
965 aggr_get_id = perf_stat__get_socket; 1008 aggr_get_id = perf_stat__get_socket_cached;
966 break; 1009 break;
967 case AGGR_CORE: 1010 case AGGR_CORE:
968 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1011 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
969 perror("cannot build core map"); 1012 perror("cannot build core map");
970 return -1; 1013 return -1;
971 } 1014 }
972 aggr_get_id = perf_stat__get_core; 1015 aggr_get_id = perf_stat__get_core_cached;
973 break; 1016 break;
974 case AGGR_NONE: 1017 case AGGR_NONE:
975 case AGGR_GLOBAL: 1018 case AGGR_GLOBAL:
@@ -978,7 +1021,15 @@ static int perf_stat_init_aggr_mode(void)
978 default: 1021 default:
979 break; 1022 break;
980 } 1023 }
981 return 0; 1024
1025 /*
1026 * The evsel_list->cpus is the base we operate on,
1027 * taking the highest cpu number to be the size of
1028 * the aggregation translate cpumap.
1029 */
1030 nr = cpu_map__get_max(evsel_list->cpus);
1031 cpus_aggr_map = cpu_map__empty_new(nr + 1);
1032 return cpus_aggr_map ? 0 : -ENOMEM;
982} 1033}
983 1034
984/* 1035/*