diff options
author | Kan Liang <kan.liang@linux.intel.com> | 2019-06-04 18:50:42 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-06-10 15:19:59 -0400 |
commit | db5742b6849ed7f01d764e6755b8ab2b422f29b2 (patch) | |
tree | 7b302de744f0c1c6b6af182e99754c1a34832153 /tools/perf/builtin-stat.c | |
parent | acae8b36cded0ee62038dedd0a44d54d5d673a96 (diff) |
perf stat: Support per-die aggregation
It is useful to aggregate counts per die. E.g. Uncore becomes die-scope
on Xeon Cascade Lake-AP.
Introduce a new option "--per-die" to support per-die aggregation.
The global id for each core has been changed to socket + die id + core
id. The global id for each die is socket + die id.
Add die information for per-core aggregation. The output of per-core
aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts
which rely on the output format of per-core aggregation probably be
broken.
For 'perf stat record/report', there is no die information when
processing the old perf.data. The per-die result will be the same as
per-socket.
Committer notes:
Renamed 'die' variable to 'die_id' to fix the build in some systems:
CC /tmp/build/perf/builtin-script.o
cc1: warnings being treated as errors
builtin-stat.c: In function 'perf_env__get_die':
builtin-stat.c:963: error: declaration of 'die' shadows a global declaration
util/util.h:19: error: shadowed declaration is here
mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 87 |
1 files changed, 81 insertions, 6 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24b8e690fb69..272df8426f0a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -777,6 +777,8 @@ static struct option stat_options[] = { | |||
777 | "stop workload and print counts after a timeout period in ms (>= 10ms)"), | 777 | "stop workload and print counts after a timeout period in ms (>= 10ms)"), |
778 | OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, | 778 | OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, |
779 | "aggregate counts per processor socket", AGGR_SOCKET), | 779 | "aggregate counts per processor socket", AGGR_SOCKET), |
780 | OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, | ||
781 | "aggregate counts per processor die", AGGR_DIE), | ||
780 | OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, | 782 | OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, |
781 | "aggregate counts per physical processor core", AGGR_CORE), | 783 | "aggregate counts per physical processor core", AGGR_CORE), |
782 | OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, | 784 | OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, |
@@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, | |||
801 | return cpu_map__get_socket(map, cpu, NULL); | 803 | return cpu_map__get_socket(map, cpu, NULL); |
802 | } | 804 | } |
803 | 805 | ||
806 | static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, | ||
807 | struct cpu_map *map, int cpu) | ||
808 | { | ||
809 | return cpu_map__get_die(map, cpu, NULL); | ||
810 | } | ||
811 | |||
804 | static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, | 812 | static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, |
805 | struct cpu_map *map, int cpu) | 813 | struct cpu_map *map, int cpu) |
806 | { | 814 | { |
@@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config, | |||
841 | return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); | 849 | return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); |
842 | } | 850 | } |
843 | 851 | ||
852 | static int perf_stat__get_die_cached(struct perf_stat_config *config, | ||
853 | struct cpu_map *map, int idx) | ||
854 | { | ||
855 | return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); | ||
856 | } | ||
857 | |||
844 | static int perf_stat__get_core_cached(struct perf_stat_config *config, | 858 | static int perf_stat__get_core_cached(struct perf_stat_config *config, |
845 | struct cpu_map *map, int idx) | 859 | struct cpu_map *map, int idx) |
846 | { | 860 | { |
@@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void) | |||
871 | } | 885 | } |
872 | stat_config.aggr_get_id = perf_stat__get_socket_cached; | 886 | stat_config.aggr_get_id = perf_stat__get_socket_cached; |
873 | break; | 887 | break; |
888 | case AGGR_DIE: | ||
889 | if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) { | ||
890 | perror("cannot build die map"); | ||
891 | return -1; | ||
892 | } | ||
893 | stat_config.aggr_get_id = perf_stat__get_die_cached; | ||
894 | break; | ||
874 | case AGGR_CORE: | 895 | case AGGR_CORE: |
875 | if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { | 896 | if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { |
876 | perror("cannot build core map"); | 897 | perror("cannot build core map"); |
@@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) | |||
936 | return cpu == -1 ? -1 : env->cpu[cpu].socket_id; | 957 | return cpu == -1 ? -1 : env->cpu[cpu].socket_id; |
937 | } | 958 | } |
938 | 959 | ||
960 | static int perf_env__get_die(struct cpu_map *map, int idx, void *data) | ||
961 | { | ||
962 | struct perf_env *env = data; | ||
963 | int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); | ||
964 | |||
965 | if (cpu != -1) { | ||
966 | /* | ||
967 | * Encode socket in bit range 15:8 | ||
968 | * die_id is relative to socket, | ||
969 | * we need a global id. So we combine | ||
970 | * socket + die id | ||
971 | */ | ||
972 | if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) | ||
973 | return -1; | ||
974 | |||
975 | if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) | ||
976 | return -1; | ||
977 | |||
978 | die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); | ||
979 | } | ||
980 | |||
981 | return die_id; | ||
982 | } | ||
983 | |||
939 | static int perf_env__get_core(struct cpu_map *map, int idx, void *data) | 984 | static int perf_env__get_core(struct cpu_map *map, int idx, void *data) |
940 | { | 985 | { |
941 | struct perf_env *env = data; | 986 | struct perf_env *env = data; |
942 | int core = -1, cpu = perf_env__get_cpu(env, map, idx); | 987 | int core = -1, cpu = perf_env__get_cpu(env, map, idx); |
943 | 988 | ||
944 | if (cpu != -1) { | 989 | if (cpu != -1) { |
945 | int socket_id = env->cpu[cpu].socket_id; | ||
946 | |||
947 | /* | 990 | /* |
948 | * Encode socket in upper 16 bits | 991 | * Encode socket in bit range 31:24 |
949 | * core_id is relative to socket, and | 992 | * encode die id in bit range 23:16 |
993 | * core_id is relative to socket and die, | ||
950 | * we need a global id. So we combine | 994 | * we need a global id. So we combine |
951 | * socket + core id. | 995 | * socket + die id + core id |
952 | */ | 996 | */ |
953 | core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); | 997 | if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) |
998 | return -1; | ||
999 | |||
1000 | if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) | ||
1001 | return -1; | ||
1002 | |||
1003 | if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) | ||
1004 | return -1; | ||
1005 | |||
1006 | core = (env->cpu[cpu].socket_id << 24) | | ||
1007 | (env->cpu[cpu].die_id << 16) | | ||
1008 | (env->cpu[cpu].core_id & 0xffff); | ||
954 | } | 1009 | } |
955 | 1010 | ||
956 | return core; | 1011 | return core; |
@@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus | |||
962 | return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); | 1017 | return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); |
963 | } | 1018 | } |
964 | 1019 | ||
1020 | static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus, | ||
1021 | struct cpu_map **diep) | ||
1022 | { | ||
1023 | return cpu_map__build_map(cpus, diep, perf_env__get_die, env); | ||
1024 | } | ||
1025 | |||
965 | static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, | 1026 | static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, |
966 | struct cpu_map **corep) | 1027 | struct cpu_map **corep) |
967 | { | 1028 | { |
@@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un | |||
973 | { | 1034 | { |
974 | return perf_env__get_socket(map, idx, &perf_stat.session->header.env); | 1035 | return perf_env__get_socket(map, idx, &perf_stat.session->header.env); |
975 | } | 1036 | } |
1037 | static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, | ||
1038 | struct cpu_map *map, int idx) | ||
1039 | { | ||
1040 | return perf_env__get_die(map, idx, &perf_stat.session->header.env); | ||
1041 | } | ||
976 | 1042 | ||
977 | static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, | 1043 | static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, |
978 | struct cpu_map *map, int idx) | 1044 | struct cpu_map *map, int idx) |
@@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) | |||
992 | } | 1058 | } |
993 | stat_config.aggr_get_id = perf_stat__get_socket_file; | 1059 | stat_config.aggr_get_id = perf_stat__get_socket_file; |
994 | break; | 1060 | break; |
1061 | case AGGR_DIE: | ||
1062 | if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) { | ||
1063 | perror("cannot build die map"); | ||
1064 | return -1; | ||
1065 | } | ||
1066 | stat_config.aggr_get_id = perf_stat__get_die_file; | ||
1067 | break; | ||
995 | case AGGR_CORE: | 1068 | case AGGR_CORE: |
996 | if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { | 1069 | if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { |
997 | perror("cannot build core map"); | 1070 | perror("cannot build core map"); |
@@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv) | |||
1542 | OPT_STRING('i', "input", &input_name, "file", "input file name"), | 1615 | OPT_STRING('i', "input", &input_name, "file", "input file name"), |
1543 | OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, | 1616 | OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, |
1544 | "aggregate counts per processor socket", AGGR_SOCKET), | 1617 | "aggregate counts per processor socket", AGGR_SOCKET), |
1618 | OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, | ||
1619 | "aggregate counts per processor die", AGGR_DIE), | ||
1545 | OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, | 1620 | OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, |
1546 | "aggregate counts per physical processor core", AGGR_CORE), | 1621 | "aggregate counts per physical processor core", AGGR_CORE), |
1547 | OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, | 1622 | OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, |