summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKan Liang <kan.liang@linux.intel.com>2019-06-04 18:50:42 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-06-10 15:19:59 -0400
commitdb5742b6849ed7f01d764e6755b8ab2b422f29b2 (patch)
tree7b302de744f0c1c6b6af182e99754c1a34832153
parentacae8b36cded0ee62038dedd0a44d54d5d673a96 (diff)
perf stat: Support per-die aggregation
It is useful to aggregate counts per die. E.g. Uncore becomes die-scope on Xeon Cascade Lake-AP. Introduce a new option "--per-die" to support per-die aggregation. The global id for each core has been changed to socket + die id + core id. The global id for each die is socket + die id. Add die information for per-core aggregation. The output of per-core aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts which rely on the output format of per-core aggregation probably be broken. For 'perf stat record/report', there is no die information when processing the old perf.data. The per-die result will be the same as per-socket. Committer notes: Renamed 'die' variable to 'die_id' to fix the build in some systems: CC /tmp/build/perf/builtin-script.o cc1: warnings being treated as errors builtin-stat.c: In function 'perf_env__get_die': builtin-stat.c:963: error: declaration of 'die' shadows a global declaration util/util.h:19: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/builtin-stat.c87
-rw-r--r--tools/perf/util/cpumap.c57
-rw-r--r--tools/perf/util/cpumap.h9
-rw-r--r--tools/perf/util/stat-display.c29
-rw-r--r--tools/perf/util/stat-shadow.c1
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/stat.h1
8 files changed, 177 insertions, 18 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 1e312c2672e4..930c51c01201 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -200,6 +200,13 @@ use --per-socket in addition to -a. (system-wide). The output includes the
200socket number and the number of online processors on that socket. This is 200socket number and the number of online processors on that socket. This is
201useful to gauge the amount of aggregation. 201useful to gauge the amount of aggregation.
202 202
203--per-die::
204Aggregate counts per processor die for system-wide mode measurements. This
205is a useful mode to detect imbalance between dies. To enable this mode,
206use --per-die in addition to -a. (system-wide). The output includes the
207die number and the number of online processors on that die. This is
208useful to gauge the amount of aggregation.
209
203--per-core:: 210--per-core::
204Aggregate counts per physical processor for system-wide mode measurements. This 211Aggregate counts per physical processor for system-wide mode measurements. This
205is a useful mode to detect imbalance between physical cores. To enable this mode, 212is a useful mode to detect imbalance between physical cores. To enable this mode,
@@ -239,6 +246,9 @@ Input file name.
239--per-socket:: 246--per-socket::
240Aggregate counts per processor socket for system-wide mode measurements. 247Aggregate counts per processor socket for system-wide mode measurements.
241 248
249--per-die::
250Aggregate counts per processor die for system-wide mode measurements.
251
242--per-core:: 252--per-core::
243Aggregate counts per physical processor for system-wide mode measurements. 253Aggregate counts per physical processor for system-wide mode measurements.
244 254
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 24b8e690fb69..272df8426f0a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -777,6 +777,8 @@ static struct option stat_options[] = {
777 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 777 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
778 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 778 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
779 "aggregate counts per processor socket", AGGR_SOCKET), 779 "aggregate counts per processor socket", AGGR_SOCKET),
780 OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
781 "aggregate counts per processor die", AGGR_DIE),
780 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 782 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
781 "aggregate counts per physical processor core", AGGR_CORE), 783 "aggregate counts per physical processor core", AGGR_CORE),
782 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 784 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
801 return cpu_map__get_socket(map, cpu, NULL); 803 return cpu_map__get_socket(map, cpu, NULL);
802} 804}
803 805
806static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
807 struct cpu_map *map, int cpu)
808{
809 return cpu_map__get_die(map, cpu, NULL);
810}
811
804static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, 812static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
805 struct cpu_map *map, int cpu) 813 struct cpu_map *map, int cpu)
806{ 814{
@@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config,
841 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); 849 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
842} 850}
843 851
852static int perf_stat__get_die_cached(struct perf_stat_config *config,
853 struct cpu_map *map, int idx)
854{
855 return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
856}
857
844static int perf_stat__get_core_cached(struct perf_stat_config *config, 858static int perf_stat__get_core_cached(struct perf_stat_config *config,
845 struct cpu_map *map, int idx) 859 struct cpu_map *map, int idx)
846{ 860{
@@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void)
871 } 885 }
872 stat_config.aggr_get_id = perf_stat__get_socket_cached; 886 stat_config.aggr_get_id = perf_stat__get_socket_cached;
873 break; 887 break;
888 case AGGR_DIE:
889 if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) {
890 perror("cannot build die map");
891 return -1;
892 }
893 stat_config.aggr_get_id = perf_stat__get_die_cached;
894 break;
874 case AGGR_CORE: 895 case AGGR_CORE:
875 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { 896 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
876 perror("cannot build core map"); 897 perror("cannot build core map");
@@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
936 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 957 return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
937} 958}
938 959
960static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
961{
962 struct perf_env *env = data;
963 int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
964
965 if (cpu != -1) {
966 /*
967 * Encode socket in bit range 15:8
968 * die_id is relative to socket,
969 * we need a global id. So we combine
970 * socket + die id
971 */
972 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
973 return -1;
974
975 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
976 return -1;
977
978 die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
979 }
980
981 return die_id;
982}
983
939static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 984static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
940{ 985{
941 struct perf_env *env = data; 986 struct perf_env *env = data;
942 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 987 int core = -1, cpu = perf_env__get_cpu(env, map, idx);
943 988
944 if (cpu != -1) { 989 if (cpu != -1) {
945 int socket_id = env->cpu[cpu].socket_id;
946
947 /* 990 /*
948 * Encode socket in upper 16 bits 991 * Encode socket in bit range 31:24
949 * core_id is relative to socket, and 992 * encode die id in bit range 23:16
993 * core_id is relative to socket and die,
950 * we need a global id. So we combine 994 * we need a global id. So we combine
951 * socket + core id. 995 * socket + die id + core id
952 */ 996 */
953 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 997 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
998 return -1;
999
1000 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1001 return -1;
1002
1003 if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1004 return -1;
1005
1006 core = (env->cpu[cpu].socket_id << 24) |
1007 (env->cpu[cpu].die_id << 16) |
1008 (env->cpu[cpu].core_id & 0xffff);
954 } 1009 }
955 1010
956 return core; 1011 return core;
@@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus
962 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1017 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
963} 1018}
964 1019
1020static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus,
1021 struct cpu_map **diep)
1022{
1023 return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1024}
1025
965static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1026static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
966 struct cpu_map **corep) 1027 struct cpu_map **corep)
967{ 1028{
@@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un
973{ 1034{
974 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1035 return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
975} 1036}
1037static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1038 struct cpu_map *map, int idx)
1039{
1040 return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1041}
976 1042
977static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, 1043static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
978 struct cpu_map *map, int idx) 1044 struct cpu_map *map, int idx)
@@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
992 } 1058 }
993 stat_config.aggr_get_id = perf_stat__get_socket_file; 1059 stat_config.aggr_get_id = perf_stat__get_socket_file;
994 break; 1060 break;
1061 case AGGR_DIE:
1062 if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
1063 perror("cannot build die map");
1064 return -1;
1065 }
1066 stat_config.aggr_get_id = perf_stat__get_die_file;
1067 break;
995 case AGGR_CORE: 1068 case AGGR_CORE:
996 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { 1069 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
997 perror("cannot build core map"); 1070 perror("cannot build core map");
@@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv)
1542 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1615 OPT_STRING('i', "input", &input_name, "file", "input file name"),
1543 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 1616 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1544 "aggregate counts per processor socket", AGGR_SOCKET), 1617 "aggregate counts per processor socket", AGGR_SOCKET),
1618 OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1619 "aggregate counts per processor die", AGGR_DIE),
1545 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 1620 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1546 "aggregate counts per physical processor core", AGGR_CORE), 1621 "aggregate counts per physical processor core", AGGR_CORE),
1547 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 1622 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 7db1365c667e..c11a459ca582 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -380,6 +380,39 @@ int cpu_map__get_die_id(int cpu)
380 return ret ?: value; 380 return ret ?: value;
381} 381}
382 382
383int cpu_map__get_die(struct cpu_map *map, int idx, void *data)
384{
385 int cpu, die_id, s;
386
387 if (idx > map->nr)
388 return -1;
389
390 cpu = map->map[idx];
391
392 die_id = cpu_map__get_die_id(cpu);
393 /* There is no die_id on legacy system. */
394 if (die_id == -1)
395 die_id = 0;
396
397 s = cpu_map__get_socket(map, idx, data);
398 if (s == -1)
399 return -1;
400
401 /*
402 * Encode socket in bit range 15:8
403 * die_id is relative to socket, and
404 * we need a global id. So we combine
405 * socket + die id
406 */
407 if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
408 return -1;
409
410 if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
411 return -1;
412
413 return (s << 8) | (die_id & 0xff);
414}
415
383int cpu_map__get_core_id(int cpu) 416int cpu_map__get_core_id(int cpu)
384{ 417{
385 int value, ret = cpu__get_topology_int(cpu, "core_id", &value); 418 int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
@@ -388,7 +421,7 @@ int cpu_map__get_core_id(int cpu)
388 421
389int cpu_map__get_core(struct cpu_map *map, int idx, void *data) 422int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
390{ 423{
391 int cpu, s; 424 int cpu, s_die;
392 425
393 if (idx > map->nr) 426 if (idx > map->nr)
394 return -1; 427 return -1;
@@ -397,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
397 430
398 cpu = cpu_map__get_core_id(cpu); 431 cpu = cpu_map__get_core_id(cpu);
399 432
400 s = cpu_map__get_socket(map, idx, data); 433 /* s_die is the combination of socket + die id */
401 if (s == -1) 434 s_die = cpu_map__get_die(map, idx, data);
435 if (s_die == -1)
402 return -1; 436 return -1;
403 437
404 /* 438 /*
405 * encode socket in upper 16 bits 439 * encode socket in bit range 31:24
406 * core_id is relative to socket, and 440 * encode die id in bit range 23:16
441 * core_id is relative to socket and die,
407 * we need a global id. So we combine 442 * we need a global id. So we combine
408 * socket+ core id 443 * socket + die id + core id
409 */ 444 */
410 return (s << 16) | (cpu & 0xffff); 445 if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
446 return -1;
447
448 return (s_die << 16) | (cpu & 0xffff);
411} 449}
412 450
413int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) 451int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
@@ -415,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
415 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); 453 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
416} 454}
417 455
456int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep)
457{
458 return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
459}
460
418int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) 461int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
419{ 462{
420 return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); 463 return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 6762ff9e7ad5..1265f0e33920 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -26,9 +26,11 @@ size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
26int cpu_map__get_socket_id(int cpu); 26int cpu_map__get_socket_id(int cpu);
27int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); 27int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
28int cpu_map__get_die_id(int cpu); 28int cpu_map__get_die_id(int cpu);
29int cpu_map__get_die(struct cpu_map *map, int idx, void *data);
29int cpu_map__get_core_id(int cpu); 30int cpu_map__get_core_id(int cpu);
30int cpu_map__get_core(struct cpu_map *map, int idx, void *data); 31int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
31int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 32int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
33int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep);
32int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); 34int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
33const struct cpu_map *cpu_map__online(void); /* thread unsafe */ 35const struct cpu_map *cpu_map__online(void); /* thread unsafe */
34 36
@@ -44,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
44 46
45static inline int cpu_map__id_to_socket(int id) 47static inline int cpu_map__id_to_socket(int id)
46{ 48{
47 return id >> 16; 49 return id >> 24;
50}
51
52static inline int cpu_map__id_to_die(int id)
53{
54 return (id >> 16) & 0xff;
48} 55}
49 56
50static inline int cpu_map__id_to_cpu(int id) 57static inline int cpu_map__id_to_cpu(int id)
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 4c53bae5644b..a6b9de3e83fc 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -69,8 +69,9 @@ static void aggr_printout(struct perf_stat_config *config,
69{ 69{
70 switch (config->aggr_mode) { 70 switch (config->aggr_mode) {
71 case AGGR_CORE: 71 case AGGR_CORE:
72 fprintf(config->output, "S%d-C%*d%s%*d%s", 72 fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
73 cpu_map__id_to_socket(id), 73 cpu_map__id_to_socket(id),
74 cpu_map__id_to_die(id),
74 config->csv_output ? 0 : -8, 75 config->csv_output ? 0 : -8,
75 cpu_map__id_to_cpu(id), 76 cpu_map__id_to_cpu(id),
76 config->csv_sep, 77 config->csv_sep,
@@ -78,6 +79,16 @@ static void aggr_printout(struct perf_stat_config *config,
78 nr, 79 nr,
79 config->csv_sep); 80 config->csv_sep);
80 break; 81 break;
82 case AGGR_DIE:
83 fprintf(config->output, "S%d-D%*d%s%*d%s",
84 cpu_map__id_to_socket(id << 16),
85 config->csv_output ? 0 : -8,
86 cpu_map__id_to_die(id << 16),
87 config->csv_sep,
88 config->csv_output ? 0 : 4,
89 nr,
90 config->csv_sep);
91 break;
81 case AGGR_SOCKET: 92 case AGGR_SOCKET:
82 fprintf(config->output, "S%*d%s%*d%s", 93 fprintf(config->output, "S%*d%s%*d%s",
83 config->csv_output ? 0 : -5, 94 config->csv_output ? 0 : -5,
@@ -89,8 +100,9 @@ static void aggr_printout(struct perf_stat_config *config,
89 break; 100 break;
90 case AGGR_NONE: 101 case AGGR_NONE:
91 if (evsel->percore) { 102 if (evsel->percore) {
92 fprintf(config->output, "S%d-C%*d%s", 103 fprintf(config->output, "S%d-D%d-C%*d%s",
93 cpu_map__id_to_socket(id), 104 cpu_map__id_to_socket(id),
105 cpu_map__id_to_die(id),
94 config->csv_output ? 0 : -5, 106 config->csv_output ? 0 : -5,
95 cpu_map__id_to_cpu(id), config->csv_sep); 107 cpu_map__id_to_cpu(id), config->csv_sep);
96 } else { 108 } else {
@@ -407,6 +419,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
407 [AGGR_THREAD] = 1, 419 [AGGR_THREAD] = 1,
408 [AGGR_NONE] = 1, 420 [AGGR_NONE] = 1,
409 [AGGR_SOCKET] = 2, 421 [AGGR_SOCKET] = 2,
422 [AGGR_DIE] = 2,
410 [AGGR_CORE] = 2, 423 [AGGR_CORE] = 2,
411 }; 424 };
412 425
@@ -879,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
879} 892}
880 893
881static int aggr_header_lens[] = { 894static int aggr_header_lens[] = {
882 [AGGR_CORE] = 18, 895 [AGGR_CORE] = 24,
896 [AGGR_DIE] = 18,
883 [AGGR_SOCKET] = 12, 897 [AGGR_SOCKET] = 12,
884 [AGGR_NONE] = 6, 898 [AGGR_NONE] = 6,
885 [AGGR_THREAD] = 24, 899 [AGGR_THREAD] = 24,
@@ -888,6 +902,7 @@ static int aggr_header_lens[] = {
888 902
889static const char *aggr_header_csv[] = { 903static const char *aggr_header_csv[] = {
890 [AGGR_CORE] = "core,cpus,", 904 [AGGR_CORE] = "core,cpus,",
905 [AGGR_DIE] = "die,cpus",
891 [AGGR_SOCKET] = "socket,cpus", 906 [AGGR_SOCKET] = "socket,cpus",
892 [AGGR_NONE] = "cpu,", 907 [AGGR_NONE] = "cpu,",
893 [AGGR_THREAD] = "comm-pid,", 908 [AGGR_THREAD] = "comm-pid,",
@@ -954,8 +969,13 @@ static void print_interval(struct perf_stat_config *config,
954 if (!metric_only) 969 if (!metric_only)
955 fprintf(output, " counts %*s events\n", unit_width, "unit"); 970 fprintf(output, " counts %*s events\n", unit_width, "unit");
956 break; 971 break;
972 case AGGR_DIE:
973 fprintf(output, "# time die cpus");
974 if (!metric_only)
975 fprintf(output, " counts %*s events\n", unit_width, "unit");
976 break;
957 case AGGR_CORE: 977 case AGGR_CORE:
958 fprintf(output, "# time core cpus"); 978 fprintf(output, "# time core cpus");
959 if (!metric_only) 979 if (!metric_only)
960 fprintf(output, " counts %*s events\n", unit_width, "unit"); 980 fprintf(output, " counts %*s events\n", unit_width, "unit");
961 break; 981 break;
@@ -1165,6 +1185,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
1165 1185
1166 switch (config->aggr_mode) { 1186 switch (config->aggr_mode) {
1167 case AGGR_CORE: 1187 case AGGR_CORE:
1188 case AGGR_DIE:
1168 case AGGR_SOCKET: 1189 case AGGR_SOCKET:
1169 print_aggr(config, evlist, prefix); 1190 print_aggr(config, evlist, prefix);
1170 break; 1191 break;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 83d8094be4fe..027b09aaa4cf 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -12,6 +12,7 @@
12/* 12/*
13 * AGGR_GLOBAL: Use CPU 0 13 * AGGR_GLOBAL: Use CPU 0
14 * AGGR_SOCKET: Use first CPU of socket 14 * AGGR_SOCKET: Use first CPU of socket
15 * AGGR_DIE: Use first CPU of die
15 * AGGR_CORE: Use first CPU of core 16 * AGGR_CORE: Use first CPU of core
16 * AGGR_NONE: Use matching CPU 17 * AGGR_NONE: Use matching CPU
17 * AGGR_THREAD: Not supported? 18 * AGGR_THREAD: Not supported?
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c3115d939b0b..d91fe754b6d2 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
272 switch (config->aggr_mode) { 272 switch (config->aggr_mode) {
273 case AGGR_THREAD: 273 case AGGR_THREAD:
274 case AGGR_CORE: 274 case AGGR_CORE:
275 case AGGR_DIE:
275 case AGGR_SOCKET: 276 case AGGR_SOCKET:
276 case AGGR_NONE: 277 case AGGR_NONE:
277 if (!evsel->snapshot) 278 if (!evsel->snapshot)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 2f9c9159a364..7032dd1eeac2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -44,6 +44,7 @@ enum aggr_mode {
44 AGGR_NONE, 44 AGGR_NONE,
45 AGGR_GLOBAL, 45 AGGR_GLOBAL,
46 AGGR_SOCKET, 46 AGGR_SOCKET,
47 AGGR_DIE,
47 AGGR_CORE, 48 AGGR_CORE,
48 AGGR_THREAD, 49 AGGR_THREAD,
49 AGGR_UNSET, 50 AGGR_UNSET,