diff options
author | Kan Liang <kan.liang@linux.intel.com> | 2019-06-04 18:50:42 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-06-10 15:19:59 -0400 |
commit | db5742b6849ed7f01d764e6755b8ab2b422f29b2 (patch) | |
tree | 7b302de744f0c1c6b6af182e99754c1a34832153 | |
parent | acae8b36cded0ee62038dedd0a44d54d5d673a96 (diff) |
perf stat: Support per-die aggregation
It is useful to aggregate counts per die. E.g. Uncore becomes die-scope
on Xeon Cascade Lake-AP.
Introduce a new option "--per-die" to support per-die aggregation.
The global id for each core has been changed to socket + die id + core
id. The global id for each die is socket + die id.
Add die information for per-core aggregation. The output of per-core
aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts
which rely on the output format of per-core aggregation probably be
broken.
For 'perf stat record/report', there is no die information when
processing the old perf.data. The per-die result will be the same as
per-socket.
Committer notes:
Renamed 'die' variable to 'die_id' to fix the build in some systems:
CC /tmp/build/perf/builtin-script.o
cc1: warnings being treated as errors
builtin-stat.c: In function 'perf_env__get_die':
builtin-stat.c:963: error: declaration of 'die' shadows a global declaration
util/util.h:19: error: shadowed declaration is here
mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 10 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 87 | ||||
-rw-r--r-- | tools/perf/util/cpumap.c | 57 | ||||
-rw-r--r-- | tools/perf/util/cpumap.h | 9 | ||||
-rw-r--r-- | tools/perf/util/stat-display.c | 29 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 1 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 1 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 1 |
8 files changed, 177 insertions, 18 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 1e312c2672e4..930c51c01201 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -200,6 +200,13 @@ use --per-socket in addition to -a. (system-wide). The output includes the | |||
200 | socket number and the number of online processors on that socket. This is | 200 | socket number and the number of online processors on that socket. This is |
201 | useful to gauge the amount of aggregation. | 201 | useful to gauge the amount of aggregation. |
202 | 202 | ||
203 | --per-die:: | ||
204 | Aggregate counts per processor die for system-wide mode measurements. This | ||
205 | is a useful mode to detect imbalance between dies. To enable this mode, | ||
206 | use --per-die in addition to -a. (system-wide). The output includes the | ||
207 | die number and the number of online processors on that die. This is | ||
208 | useful to gauge the amount of aggregation. | ||
209 | |||
203 | --per-core:: | 210 | --per-core:: |
204 | Aggregate counts per physical processor for system-wide mode measurements. This | 211 | Aggregate counts per physical processor for system-wide mode measurements. This |
205 | is a useful mode to detect imbalance between physical cores. To enable this mode, | 212 | is a useful mode to detect imbalance between physical cores. To enable this mode, |
@@ -239,6 +246,9 @@ Input file name. | |||
239 | --per-socket:: | 246 | --per-socket:: |
240 | Aggregate counts per processor socket for system-wide mode measurements. | 247 | Aggregate counts per processor socket for system-wide mode measurements. |
241 | 248 | ||
249 | --per-die:: | ||
250 | Aggregate counts per processor die for system-wide mode measurements. | ||
251 | |||
242 | --per-core:: | 252 | --per-core:: |
243 | Aggregate counts per physical processor for system-wide mode measurements. | 253 | Aggregate counts per physical processor for system-wide mode measurements. |
244 | 254 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24b8e690fb69..272df8426f0a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -777,6 +777,8 @@ static struct option stat_options[] = { | |||
777 | "stop workload and print counts after a timeout period in ms (>= 10ms)"), | 777 | "stop workload and print counts after a timeout period in ms (>= 10ms)"), |
778 | OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, | 778 | OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, |
779 | "aggregate counts per processor socket", AGGR_SOCKET), | 779 | "aggregate counts per processor socket", AGGR_SOCKET), |
780 | OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, | ||
781 | "aggregate counts per processor die", AGGR_DIE), | ||
780 | OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, | 782 | OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, |
781 | "aggregate counts per physical processor core", AGGR_CORE), | 783 | "aggregate counts per physical processor core", AGGR_CORE), |
782 | OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, | 784 | OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, |
@@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, | |||
801 | return cpu_map__get_socket(map, cpu, NULL); | 803 | return cpu_map__get_socket(map, cpu, NULL); |
802 | } | 804 | } |
803 | 805 | ||
806 | static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, | ||
807 | struct cpu_map *map, int cpu) | ||
808 | { | ||
809 | return cpu_map__get_die(map, cpu, NULL); | ||
810 | } | ||
811 | |||
804 | static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, | 812 | static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, |
805 | struct cpu_map *map, int cpu) | 813 | struct cpu_map *map, int cpu) |
806 | { | 814 | { |
@@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config, | |||
841 | return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); | 849 | return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); |
842 | } | 850 | } |
843 | 851 | ||
852 | static int perf_stat__get_die_cached(struct perf_stat_config *config, | ||
853 | struct cpu_map *map, int idx) | ||
854 | { | ||
855 | return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); | ||
856 | } | ||
857 | |||
844 | static int perf_stat__get_core_cached(struct perf_stat_config *config, | 858 | static int perf_stat__get_core_cached(struct perf_stat_config *config, |
845 | struct cpu_map *map, int idx) | 859 | struct cpu_map *map, int idx) |
846 | { | 860 | { |
@@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void) | |||
871 | } | 885 | } |
872 | stat_config.aggr_get_id = perf_stat__get_socket_cached; | 886 | stat_config.aggr_get_id = perf_stat__get_socket_cached; |
873 | break; | 887 | break; |
888 | case AGGR_DIE: | ||
889 | if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) { | ||
890 | perror("cannot build die map"); | ||
891 | return -1; | ||
892 | } | ||
893 | stat_config.aggr_get_id = perf_stat__get_die_cached; | ||
894 | break; | ||
874 | case AGGR_CORE: | 895 | case AGGR_CORE: |
875 | if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { | 896 | if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { |
876 | perror("cannot build core map"); | 897 | perror("cannot build core map"); |
@@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) | |||
936 | return cpu == -1 ? -1 : env->cpu[cpu].socket_id; | 957 | return cpu == -1 ? -1 : env->cpu[cpu].socket_id; |
937 | } | 958 | } |
938 | 959 | ||
960 | static int perf_env__get_die(struct cpu_map *map, int idx, void *data) | ||
961 | { | ||
962 | struct perf_env *env = data; | ||
963 | int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); | ||
964 | |||
965 | if (cpu != -1) { | ||
966 | /* | ||
967 | * Encode socket in bit range 15:8 | ||
968 | * die_id is relative to socket, | ||
969 | * we need a global id. So we combine | ||
970 | * socket + die id | ||
971 | */ | ||
972 | if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) | ||
973 | return -1; | ||
974 | |||
975 | if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) | ||
976 | return -1; | ||
977 | |||
978 | die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); | ||
979 | } | ||
980 | |||
981 | return die_id; | ||
982 | } | ||
983 | |||
939 | static int perf_env__get_core(struct cpu_map *map, int idx, void *data) | 984 | static int perf_env__get_core(struct cpu_map *map, int idx, void *data) |
940 | { | 985 | { |
941 | struct perf_env *env = data; | 986 | struct perf_env *env = data; |
942 | int core = -1, cpu = perf_env__get_cpu(env, map, idx); | 987 | int core = -1, cpu = perf_env__get_cpu(env, map, idx); |
943 | 988 | ||
944 | if (cpu != -1) { | 989 | if (cpu != -1) { |
945 | int socket_id = env->cpu[cpu].socket_id; | ||
946 | |||
947 | /* | 990 | /* |
948 | * Encode socket in upper 16 bits | 991 | * Encode socket in bit range 31:24 |
949 | * core_id is relative to socket, and | 992 | * encode die id in bit range 23:16 |
993 | * core_id is relative to socket and die, | ||
950 | * we need a global id. So we combine | 994 | * we need a global id. So we combine |
951 | * socket + core id. | 995 | * socket + die id + core id |
952 | */ | 996 | */ |
953 | core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); | 997 | if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) |
998 | return -1; | ||
999 | |||
1000 | if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) | ||
1001 | return -1; | ||
1002 | |||
1003 | if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) | ||
1004 | return -1; | ||
1005 | |||
1006 | core = (env->cpu[cpu].socket_id << 24) | | ||
1007 | (env->cpu[cpu].die_id << 16) | | ||
1008 | (env->cpu[cpu].core_id & 0xffff); | ||
954 | } | 1009 | } |
955 | 1010 | ||
956 | return core; | 1011 | return core; |
@@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus | |||
962 | return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); | 1017 | return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); |
963 | } | 1018 | } |
964 | 1019 | ||
1020 | static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus, | ||
1021 | struct cpu_map **diep) | ||
1022 | { | ||
1023 | return cpu_map__build_map(cpus, diep, perf_env__get_die, env); | ||
1024 | } | ||
1025 | |||
965 | static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, | 1026 | static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, |
966 | struct cpu_map **corep) | 1027 | struct cpu_map **corep) |
967 | { | 1028 | { |
@@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un | |||
973 | { | 1034 | { |
974 | return perf_env__get_socket(map, idx, &perf_stat.session->header.env); | 1035 | return perf_env__get_socket(map, idx, &perf_stat.session->header.env); |
975 | } | 1036 | } |
1037 | static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, | ||
1038 | struct cpu_map *map, int idx) | ||
1039 | { | ||
1040 | return perf_env__get_die(map, idx, &perf_stat.session->header.env); | ||
1041 | } | ||
976 | 1042 | ||
977 | static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, | 1043 | static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, |
978 | struct cpu_map *map, int idx) | 1044 | struct cpu_map *map, int idx) |
@@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) | |||
992 | } | 1058 | } |
993 | stat_config.aggr_get_id = perf_stat__get_socket_file; | 1059 | stat_config.aggr_get_id = perf_stat__get_socket_file; |
994 | break; | 1060 | break; |
1061 | case AGGR_DIE: | ||
1062 | if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) { | ||
1063 | perror("cannot build die map"); | ||
1064 | return -1; | ||
1065 | } | ||
1066 | stat_config.aggr_get_id = perf_stat__get_die_file; | ||
1067 | break; | ||
995 | case AGGR_CORE: | 1068 | case AGGR_CORE: |
996 | if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { | 1069 | if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { |
997 | perror("cannot build core map"); | 1070 | perror("cannot build core map"); |
@@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv) | |||
1542 | OPT_STRING('i', "input", &input_name, "file", "input file name"), | 1615 | OPT_STRING('i', "input", &input_name, "file", "input file name"), |
1543 | OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, | 1616 | OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, |
1544 | "aggregate counts per processor socket", AGGR_SOCKET), | 1617 | "aggregate counts per processor socket", AGGR_SOCKET), |
1618 | OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, | ||
1619 | "aggregate counts per processor die", AGGR_DIE), | ||
1545 | OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, | 1620 | OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, |
1546 | "aggregate counts per physical processor core", AGGR_CORE), | 1621 | "aggregate counts per physical processor core", AGGR_CORE), |
1547 | OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, | 1622 | OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, |
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 7db1365c667e..c11a459ca582 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c | |||
@@ -380,6 +380,39 @@ int cpu_map__get_die_id(int cpu) | |||
380 | return ret ?: value; | 380 | return ret ?: value; |
381 | } | 381 | } |
382 | 382 | ||
383 | int cpu_map__get_die(struct cpu_map *map, int idx, void *data) | ||
384 | { | ||
385 | int cpu, die_id, s; | ||
386 | |||
387 | if (idx > map->nr) | ||
388 | return -1; | ||
389 | |||
390 | cpu = map->map[idx]; | ||
391 | |||
392 | die_id = cpu_map__get_die_id(cpu); | ||
393 | /* There is no die_id on legacy system. */ | ||
394 | if (die_id == -1) | ||
395 | die_id = 0; | ||
396 | |||
397 | s = cpu_map__get_socket(map, idx, data); | ||
398 | if (s == -1) | ||
399 | return -1; | ||
400 | |||
401 | /* | ||
402 | * Encode socket in bit range 15:8 | ||
403 | * die_id is relative to socket, and | ||
404 | * we need a global id. So we combine | ||
405 | * socket + die id | ||
406 | */ | ||
407 | if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) | ||
408 | return -1; | ||
409 | |||
410 | if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) | ||
411 | return -1; | ||
412 | |||
413 | return (s << 8) | (die_id & 0xff); | ||
414 | } | ||
415 | |||
383 | int cpu_map__get_core_id(int cpu) | 416 | int cpu_map__get_core_id(int cpu) |
384 | { | 417 | { |
385 | int value, ret = cpu__get_topology_int(cpu, "core_id", &value); | 418 | int value, ret = cpu__get_topology_int(cpu, "core_id", &value); |
@@ -388,7 +421,7 @@ int cpu_map__get_core_id(int cpu) | |||
388 | 421 | ||
389 | int cpu_map__get_core(struct cpu_map *map, int idx, void *data) | 422 | int cpu_map__get_core(struct cpu_map *map, int idx, void *data) |
390 | { | 423 | { |
391 | int cpu, s; | 424 | int cpu, s_die; |
392 | 425 | ||
393 | if (idx > map->nr) | 426 | if (idx > map->nr) |
394 | return -1; | 427 | return -1; |
@@ -397,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data) | |||
397 | 430 | ||
398 | cpu = cpu_map__get_core_id(cpu); | 431 | cpu = cpu_map__get_core_id(cpu); |
399 | 432 | ||
400 | s = cpu_map__get_socket(map, idx, data); | 433 | /* s_die is the combination of socket + die id */ |
401 | if (s == -1) | 434 | s_die = cpu_map__get_die(map, idx, data); |
435 | if (s_die == -1) | ||
402 | return -1; | 436 | return -1; |
403 | 437 | ||
404 | /* | 438 | /* |
405 | * encode socket in upper 16 bits | 439 | * encode socket in bit range 31:24 |
406 | * core_id is relative to socket, and | 440 | * encode die id in bit range 23:16 |
441 | * core_id is relative to socket and die, | ||
407 | * we need a global id. So we combine | 442 | * we need a global id. So we combine |
408 | * socket+ core id | 443 | * socket + die id + core id |
409 | */ | 444 | */ |
410 | return (s << 16) | (cpu & 0xffff); | 445 | if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) |
446 | return -1; | ||
447 | |||
448 | return (s_die << 16) | (cpu & 0xffff); | ||
411 | } | 449 | } |
412 | 450 | ||
413 | int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) | 451 | int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) |
@@ -415,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) | |||
415 | return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); | 453 | return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); |
416 | } | 454 | } |
417 | 455 | ||
456 | int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep) | ||
457 | { | ||
458 | return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); | ||
459 | } | ||
460 | |||
418 | int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) | 461 | int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) |
419 | { | 462 | { |
420 | return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); | 463 | return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); |
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 6762ff9e7ad5..1265f0e33920 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h | |||
@@ -26,9 +26,11 @@ size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); | |||
26 | int cpu_map__get_socket_id(int cpu); | 26 | int cpu_map__get_socket_id(int cpu); |
27 | int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); | 27 | int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); |
28 | int cpu_map__get_die_id(int cpu); | 28 | int cpu_map__get_die_id(int cpu); |
29 | int cpu_map__get_die(struct cpu_map *map, int idx, void *data); | ||
29 | int cpu_map__get_core_id(int cpu); | 30 | int cpu_map__get_core_id(int cpu); |
30 | int cpu_map__get_core(struct cpu_map *map, int idx, void *data); | 31 | int cpu_map__get_core(struct cpu_map *map, int idx, void *data); |
31 | int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); | 32 | int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); |
33 | int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep); | ||
32 | int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); | 34 | int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); |
33 | const struct cpu_map *cpu_map__online(void); /* thread unsafe */ | 35 | const struct cpu_map *cpu_map__online(void); /* thread unsafe */ |
34 | 36 | ||
@@ -44,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s) | |||
44 | 46 | ||
45 | static inline int cpu_map__id_to_socket(int id) | 47 | static inline int cpu_map__id_to_socket(int id) |
46 | { | 48 | { |
47 | return id >> 16; | 49 | return id >> 24; |
50 | } | ||
51 | |||
52 | static inline int cpu_map__id_to_die(int id) | ||
53 | { | ||
54 | return (id >> 16) & 0xff; | ||
48 | } | 55 | } |
49 | 56 | ||
50 | static inline int cpu_map__id_to_cpu(int id) | 57 | static inline int cpu_map__id_to_cpu(int id) |
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4c53bae5644b..a6b9de3e83fc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c | |||
@@ -69,8 +69,9 @@ static void aggr_printout(struct perf_stat_config *config, | |||
69 | { | 69 | { |
70 | switch (config->aggr_mode) { | 70 | switch (config->aggr_mode) { |
71 | case AGGR_CORE: | 71 | case AGGR_CORE: |
72 | fprintf(config->output, "S%d-C%*d%s%*d%s", | 72 | fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", |
73 | cpu_map__id_to_socket(id), | 73 | cpu_map__id_to_socket(id), |
74 | cpu_map__id_to_die(id), | ||
74 | config->csv_output ? 0 : -8, | 75 | config->csv_output ? 0 : -8, |
75 | cpu_map__id_to_cpu(id), | 76 | cpu_map__id_to_cpu(id), |
76 | config->csv_sep, | 77 | config->csv_sep, |
@@ -78,6 +79,16 @@ static void aggr_printout(struct perf_stat_config *config, | |||
78 | nr, | 79 | nr, |
79 | config->csv_sep); | 80 | config->csv_sep); |
80 | break; | 81 | break; |
82 | case AGGR_DIE: | ||
83 | fprintf(config->output, "S%d-D%*d%s%*d%s", | ||
84 | cpu_map__id_to_socket(id << 16), | ||
85 | config->csv_output ? 0 : -8, | ||
86 | cpu_map__id_to_die(id << 16), | ||
87 | config->csv_sep, | ||
88 | config->csv_output ? 0 : 4, | ||
89 | nr, | ||
90 | config->csv_sep); | ||
91 | break; | ||
81 | case AGGR_SOCKET: | 92 | case AGGR_SOCKET: |
82 | fprintf(config->output, "S%*d%s%*d%s", | 93 | fprintf(config->output, "S%*d%s%*d%s", |
83 | config->csv_output ? 0 : -5, | 94 | config->csv_output ? 0 : -5, |
@@ -89,8 +100,9 @@ static void aggr_printout(struct perf_stat_config *config, | |||
89 | break; | 100 | break; |
90 | case AGGR_NONE: | 101 | case AGGR_NONE: |
91 | if (evsel->percore) { | 102 | if (evsel->percore) { |
92 | fprintf(config->output, "S%d-C%*d%s", | 103 | fprintf(config->output, "S%d-D%d-C%*d%s", |
93 | cpu_map__id_to_socket(id), | 104 | cpu_map__id_to_socket(id), |
105 | cpu_map__id_to_die(id), | ||
94 | config->csv_output ? 0 : -5, | 106 | config->csv_output ? 0 : -5, |
95 | cpu_map__id_to_cpu(id), config->csv_sep); | 107 | cpu_map__id_to_cpu(id), config->csv_sep); |
96 | } else { | 108 | } else { |
@@ -407,6 +419,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, | |||
407 | [AGGR_THREAD] = 1, | 419 | [AGGR_THREAD] = 1, |
408 | [AGGR_NONE] = 1, | 420 | [AGGR_NONE] = 1, |
409 | [AGGR_SOCKET] = 2, | 421 | [AGGR_SOCKET] = 2, |
422 | [AGGR_DIE] = 2, | ||
410 | [AGGR_CORE] = 2, | 423 | [AGGR_CORE] = 2, |
411 | }; | 424 | }; |
412 | 425 | ||
@@ -879,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, | |||
879 | } | 892 | } |
880 | 893 | ||
881 | static int aggr_header_lens[] = { | 894 | static int aggr_header_lens[] = { |
882 | [AGGR_CORE] = 18, | 895 | [AGGR_CORE] = 24, |
896 | [AGGR_DIE] = 18, | ||
883 | [AGGR_SOCKET] = 12, | 897 | [AGGR_SOCKET] = 12, |
884 | [AGGR_NONE] = 6, | 898 | [AGGR_NONE] = 6, |
885 | [AGGR_THREAD] = 24, | 899 | [AGGR_THREAD] = 24, |
@@ -888,6 +902,7 @@ static int aggr_header_lens[] = { | |||
888 | 902 | ||
889 | static const char *aggr_header_csv[] = { | 903 | static const char *aggr_header_csv[] = { |
890 | [AGGR_CORE] = "core,cpus,", | 904 | [AGGR_CORE] = "core,cpus,", |
905 | [AGGR_DIE] = "die,cpus", | ||
891 | [AGGR_SOCKET] = "socket,cpus", | 906 | [AGGR_SOCKET] = "socket,cpus", |
892 | [AGGR_NONE] = "cpu,", | 907 | [AGGR_NONE] = "cpu,", |
893 | [AGGR_THREAD] = "comm-pid,", | 908 | [AGGR_THREAD] = "comm-pid,", |
@@ -954,8 +969,13 @@ static void print_interval(struct perf_stat_config *config, | |||
954 | if (!metric_only) | 969 | if (!metric_only) |
955 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | 970 | fprintf(output, " counts %*s events\n", unit_width, "unit"); |
956 | break; | 971 | break; |
972 | case AGGR_DIE: | ||
973 | fprintf(output, "# time die cpus"); | ||
974 | if (!metric_only) | ||
975 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | ||
976 | break; | ||
957 | case AGGR_CORE: | 977 | case AGGR_CORE: |
958 | fprintf(output, "# time core cpus"); | 978 | fprintf(output, "# time core cpus"); |
959 | if (!metric_only) | 979 | if (!metric_only) |
960 | fprintf(output, " counts %*s events\n", unit_width, "unit"); | 980 | fprintf(output, " counts %*s events\n", unit_width, "unit"); |
961 | break; | 981 | break; |
@@ -1165,6 +1185,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, | |||
1165 | 1185 | ||
1166 | switch (config->aggr_mode) { | 1186 | switch (config->aggr_mode) { |
1167 | case AGGR_CORE: | 1187 | case AGGR_CORE: |
1188 | case AGGR_DIE: | ||
1168 | case AGGR_SOCKET: | 1189 | case AGGR_SOCKET: |
1169 | print_aggr(config, evlist, prefix); | 1190 | print_aggr(config, evlist, prefix); |
1170 | break; | 1191 | break; |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 83d8094be4fe..027b09aaa4cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
@@ -12,6 +12,7 @@ | |||
12 | /* | 12 | /* |
13 | * AGGR_GLOBAL: Use CPU 0 | 13 | * AGGR_GLOBAL: Use CPU 0 |
14 | * AGGR_SOCKET: Use first CPU of socket | 14 | * AGGR_SOCKET: Use first CPU of socket |
15 | * AGGR_DIE: Use first CPU of die | ||
15 | * AGGR_CORE: Use first CPU of core | 16 | * AGGR_CORE: Use first CPU of core |
16 | * AGGR_NONE: Use matching CPU | 17 | * AGGR_NONE: Use matching CPU |
17 | * AGGR_THREAD: Not supported? | 18 | * AGGR_THREAD: Not supported? |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c3115d939b0b..d91fe754b6d2 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel | |||
272 | switch (config->aggr_mode) { | 272 | switch (config->aggr_mode) { |
273 | case AGGR_THREAD: | 273 | case AGGR_THREAD: |
274 | case AGGR_CORE: | 274 | case AGGR_CORE: |
275 | case AGGR_DIE: | ||
275 | case AGGR_SOCKET: | 276 | case AGGR_SOCKET: |
276 | case AGGR_NONE: | 277 | case AGGR_NONE: |
277 | if (!evsel->snapshot) | 278 | if (!evsel->snapshot) |
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2f9c9159a364..7032dd1eeac2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -44,6 +44,7 @@ enum aggr_mode { | |||
44 | AGGR_NONE, | 44 | AGGR_NONE, |
45 | AGGR_GLOBAL, | 45 | AGGR_GLOBAL, |
46 | AGGR_SOCKET, | 46 | AGGR_SOCKET, |
47 | AGGR_DIE, | ||
47 | AGGR_CORE, | 48 | AGGR_CORE, |
48 | AGGR_THREAD, | 49 | AGGR_THREAD, |
49 | AGGR_UNSET, | 50 | AGGR_UNSET, |