summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorKan Liang <kan.liang@linux.intel.com>2019-06-04 18:50:42 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-06-10 15:19:59 -0400
commitdb5742b6849ed7f01d764e6755b8ab2b422f29b2 (patch)
tree7b302de744f0c1c6b6af182e99754c1a34832153 /tools/perf/builtin-stat.c
parentacae8b36cded0ee62038dedd0a44d54d5d673a96 (diff)
perf stat: Support per-die aggregation
It is useful to aggregate counts per die. E.g. Uncore becomes die-scope on Xeon Cascade Lake-AP. Introduce a new option "--per-die" to support per-die aggregation. The global id for each core has been changed to socket + die id + core id. The global id for each die is socket + die id. Add die information for per-core aggregation. The output of per-core aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts which rely on the output format of per-core aggregation probably be broken. For 'perf stat record/report', there is no die information when processing the old perf.data. The per-die result will be the same as per-socket. Committer notes: Renamed 'die' variable to 'die_id' to fix the build in some systems: CC /tmp/build/perf/builtin-script.o cc1: warnings being treated as errors builtin-stat.c: In function 'perf_env__get_die': builtin-stat.c:963: error: declaration of 'die' shadows a global declaration util/util.h:19: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c87
1 files changed, 81 insertions, 6 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 24b8e690fb69..272df8426f0a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -777,6 +777,8 @@ static struct option stat_options[] = {
777 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 777 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
778 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 778 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
779 "aggregate counts per processor socket", AGGR_SOCKET), 779 "aggregate counts per processor socket", AGGR_SOCKET),
780 OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
781 "aggregate counts per processor die", AGGR_DIE),
780 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 782 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
781 "aggregate counts per physical processor core", AGGR_CORE), 783 "aggregate counts per physical processor core", AGGR_CORE),
782 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 784 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
801 return cpu_map__get_socket(map, cpu, NULL); 803 return cpu_map__get_socket(map, cpu, NULL);
802} 804}
803 805
806static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
807 struct cpu_map *map, int cpu)
808{
809 return cpu_map__get_die(map, cpu, NULL);
810}
811
804static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, 812static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
805 struct cpu_map *map, int cpu) 813 struct cpu_map *map, int cpu)
806{ 814{
@@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config,
841 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); 849 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
842} 850}
843 851
852static int perf_stat__get_die_cached(struct perf_stat_config *config,
853 struct cpu_map *map, int idx)
854{
855 return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
856}
857
844static int perf_stat__get_core_cached(struct perf_stat_config *config, 858static int perf_stat__get_core_cached(struct perf_stat_config *config,
845 struct cpu_map *map, int idx) 859 struct cpu_map *map, int idx)
846{ 860{
@@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void)
871 } 885 }
872 stat_config.aggr_get_id = perf_stat__get_socket_cached; 886 stat_config.aggr_get_id = perf_stat__get_socket_cached;
873 break; 887 break;
888 case AGGR_DIE:
889 if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) {
890 perror("cannot build die map");
891 return -1;
892 }
893 stat_config.aggr_get_id = perf_stat__get_die_cached;
894 break;
874 case AGGR_CORE: 895 case AGGR_CORE:
875 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { 896 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
876 perror("cannot build core map"); 897 perror("cannot build core map");
@@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
936 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 957 return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
937} 958}
938 959
960static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
961{
962 struct perf_env *env = data;
963 int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
964
965 if (cpu != -1) {
966 /*
967 * Encode socket in bit range 15:8
968 * die_id is relative to socket,
969 * we need a global id. So we combine
970 * socket + die id
971 */
972 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
973 return -1;
974
975 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
976 return -1;
977
978 die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
979 }
980
981 return die_id;
982}
983
939static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 984static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
940{ 985{
941 struct perf_env *env = data; 986 struct perf_env *env = data;
942 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 987 int core = -1, cpu = perf_env__get_cpu(env, map, idx);
943 988
944 if (cpu != -1) { 989 if (cpu != -1) {
945 int socket_id = env->cpu[cpu].socket_id;
946
947 /* 990 /*
948 * Encode socket in upper 16 bits 991 * Encode socket in bit range 31:24
949 * core_id is relative to socket, and 992 * encode die id in bit range 23:16
993 * core_id is relative to socket and die,
950 * we need a global id. So we combine 994 * we need a global id. So we combine
951 * socket + core id. 995 * socket + die id + core id
952 */ 996 */
953 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 997 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
998 return -1;
999
1000 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1001 return -1;
1002
1003 if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1004 return -1;
1005
1006 core = (env->cpu[cpu].socket_id << 24) |
1007 (env->cpu[cpu].die_id << 16) |
1008 (env->cpu[cpu].core_id & 0xffff);
954 } 1009 }
955 1010
956 return core; 1011 return core;
@@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus
962 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1017 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
963} 1018}
964 1019
1020static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus,
1021 struct cpu_map **diep)
1022{
1023 return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1024}
1025
965static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1026static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
966 struct cpu_map **corep) 1027 struct cpu_map **corep)
967{ 1028{
@@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un
973{ 1034{
974 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1035 return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
975} 1036}
1037static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1038 struct cpu_map *map, int idx)
1039{
1040 return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1041}
976 1042
977static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, 1043static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
978 struct cpu_map *map, int idx) 1044 struct cpu_map *map, int idx)
@@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
992 } 1058 }
993 stat_config.aggr_get_id = perf_stat__get_socket_file; 1059 stat_config.aggr_get_id = perf_stat__get_socket_file;
994 break; 1060 break;
1061 case AGGR_DIE:
1062 if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
1063 perror("cannot build die map");
1064 return -1;
1065 }
1066 stat_config.aggr_get_id = perf_stat__get_die_file;
1067 break;
995 case AGGR_CORE: 1068 case AGGR_CORE:
996 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { 1069 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
997 perror("cannot build core map"); 1070 perror("cannot build core map");
@@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv)
1542 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1615 OPT_STRING('i', "input", &input_name, "file", "input file name"),
1543 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 1616 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1544 "aggregate counts per processor socket", AGGR_SOCKET), 1617 "aggregate counts per processor socket", AGGR_SOCKET),
1618 OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1619 "aggregate counts per processor die", AGGR_DIE),
1545 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 1620 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1546 "aggregate counts per physical processor core", AGGR_CORE), 1621 "aggregate counts per physical processor core", AGGR_CORE),
1547 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 1622 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,