aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorStephane Eranian <eranian@gmail.com>2013-02-14 07:57:29 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-03-25 15:13:26 -0400
commit12c08a9f591aeda57fb3b05897169e7da5439a79 (patch)
tree4301499861ad5167139beaf6596b721badebe119 /tools
parentd4304958a25414a6e67b8a41c0f230e05cafafb6 (diff)
perf stat: Add per-core aggregation
This patch adds the --per-core option to perf stat. This option is used to aggregate system-wide counts on a per physical core basis. On processors with hyperthreading, this means counts of all HT threads running on a physical core are aggregated. This mode is useful to find imblance between physical cores running an uniform workload. Cores are identified by socket: S0-C1, means physical core 1 on socket 0. Note that cores are identified using their physical core id, thus their numbering may not be continuous. Per core aggregation can be combined with interval printing: # perf stat -a --per-core -I 1000 -e cycles sleep 1000 # time core cpus counts events 1.000090030 S0-C0 1 4,765,747 cycles 1.000090030 S0-C1 1 5,580,647 cycles 1.000090030 S0-C2 1 221,181 cycles 1.000090030 S0-C3 1 266,092 cycles Signed-off-by: Stephane Eranian <eranian@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1360846649-6411-4-git-send-email-eranian@google.com [ committer note: Remove parts already applied on 86ee6e1 to keep bisectability ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-stat.txt6
-rw-r--r--tools/perf/builtin-stat.c31
-rw-r--r--tools/perf/util/cpumap.c46
-rw-r--r--tools/perf/util/cpumap.h12
4 files changed, 92 insertions, 3 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 46027e1aedb7..2fe87fb558f0 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -126,6 +126,12 @@ use --per-socket in addition to -a. (system-wide). The output includes the
126socket number and the number of online processors on that socket. This is 126socket number and the number of online processors on that socket. This is
127useful to gauge the amount of aggregation. 127useful to gauge the amount of aggregation.
128 128
129--per-core::
130Aggregate counts per physical processor for system-wide mode measurements. This
131is a useful mode to detect imbalance between physical cores. To enable this mode,
132use --per-core in addition to -a. (system-wide). The output includes the
133core number and the number of online logical processors on that physical processor.
134
129EXAMPLES 135EXAMPLES
130-------- 136--------
131 137
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6f6ea931ab76..7e910bab1097 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -80,6 +80,7 @@ enum aggr_mode {
80 AGGR_NONE, 80 AGGR_NONE,
81 AGGR_GLOBAL, 81 AGGR_GLOBAL,
82 AGGR_SOCKET, 82 AGGR_SOCKET,
83 AGGR_CORE,
83}; 84};
84 85
85static int run_count = 1; 86static int run_count = 1;
@@ -384,6 +385,9 @@ static void print_interval(void)
384 case AGGR_SOCKET: 385 case AGGR_SOCKET:
385 fprintf(output, "# time socket cpus counts events\n"); 386 fprintf(output, "# time socket cpus counts events\n");
386 break; 387 break;
388 case AGGR_CORE:
389 fprintf(output, "# time core cpus counts events\n");
390 break;
387 case AGGR_NONE: 391 case AGGR_NONE:
388 fprintf(output, "# time CPU counts events\n"); 392 fprintf(output, "# time CPU counts events\n");
389 break; 393 break;
@@ -397,6 +401,7 @@ static void print_interval(void)
397 num_print_interval = 0; 401 num_print_interval = 0;
398 402
399 switch (aggr_mode) { 403 switch (aggr_mode) {
404 case AGGR_CORE:
400 case AGGR_SOCKET: 405 case AGGR_SOCKET:
401 print_aggr(prefix); 406 print_aggr(prefix);
402 break; 407 break;
@@ -566,13 +571,23 @@ static void print_noise(struct perf_evsel *evsel, double avg)
566 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 571 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
567} 572}
568 573
569static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr) 574static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
570{ 575{
571 switch (aggr_mode) { 576 switch (aggr_mode) {
577 case AGGR_CORE:
578 fprintf(output, "S%d-C%*d%s%*d%s",
579 cpu_map__id_to_socket(id),
580 csv_output ? 0 : -8,
581 cpu_map__id_to_cpu(id),
582 csv_sep,
583 csv_output ? 0 : 4,
584 nr,
585 csv_sep);
586 break;
572 case AGGR_SOCKET: 587 case AGGR_SOCKET:
573 fprintf(output, "S%*d%s%*d%s", 588 fprintf(output, "S%*d%s%*d%s",
574 csv_output ? 0 : -5, 589 csv_output ? 0 : -5,
575 cpu, 590 id,
576 csv_sep, 591 csv_sep,
577 csv_output ? 0 : 4, 592 csv_output ? 0 : 4,
578 nr, 593 nr,
@@ -581,7 +596,7 @@ static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr)
581 case AGGR_NONE: 596 case AGGR_NONE:
582 fprintf(output, "CPU%*d%s", 597 fprintf(output, "CPU%*d%s",
583 csv_output ? 0 : -4, 598 csv_output ? 0 : -4,
584 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 599 perf_evsel__cpus(evsel)->map[id], csv_sep);
585 break; 600 break;
586 case AGGR_GLOBAL: 601 case AGGR_GLOBAL:
587 default: 602 default:
@@ -1095,6 +1110,7 @@ static void print_stat(int argc, const char **argv)
1095 } 1110 }
1096 1111
1097 switch (aggr_mode) { 1112 switch (aggr_mode) {
1113 case AGGR_CORE:
1098 case AGGR_SOCKET: 1114 case AGGR_SOCKET:
1099 print_aggr(NULL); 1115 print_aggr(NULL);
1100 break; 1116 break;
@@ -1163,6 +1179,13 @@ static int perf_stat_init_aggr_mode(void)
1163 } 1179 }
1164 aggr_get_id = cpu_map__get_socket; 1180 aggr_get_id = cpu_map__get_socket;
1165 break; 1181 break;
1182 case AGGR_CORE:
1183 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1184 perror("cannot build core map");
1185 return -1;
1186 }
1187 aggr_get_id = cpu_map__get_core;
1188 break;
1166 case AGGR_NONE: 1189 case AGGR_NONE:
1167 case AGGR_GLOBAL: 1190 case AGGR_GLOBAL:
1168 default: 1191 default:
@@ -1372,6 +1395,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1372 "print counts at regular interval in ms (>= 100)"), 1395 "print counts at regular interval in ms (>= 100)"),
1373 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1396 OPT_SET_UINT(0, "per-socket", &aggr_mode,
1374 "aggregate counts per processor socket", AGGR_SOCKET), 1397 "aggregate counts per processor socket", AGGR_SOCKET),
1398 OPT_SET_UINT(0, "per-core", &aggr_mode,
1399 "aggregate counts per physical processor core", AGGR_CORE),
1375 OPT_END() 1400 OPT_END()
1376 }; 1401 };
1377 const char * const stat_usage[] = { 1402 const char * const stat_usage[] = {
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 7bb8e87a5847..beb8cf9f9976 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -267,7 +267,53 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
267 return 0; 267 return 0;
268} 268}
269 269
270int cpu_map__get_core(struct cpu_map *map, int idx)
271{
272 FILE *fp;
273 const char *mnt;
274 char path[PATH_MAX];
275 int cpu, ret, s;
276
277 if (idx > map->nr)
278 return -1;
279
280 cpu = map->map[idx];
281
282 mnt = sysfs_find_mountpoint();
283 if (!mnt)
284 return -1;
285
286 snprintf(path, PATH_MAX,
287 "%s/devices/system/cpu/cpu%d/topology/core_id",
288 mnt, cpu);
289
290 fp = fopen(path, "r");
291 if (!fp)
292 return -1;
293 ret = fscanf(fp, "%d", &cpu);
294 fclose(fp);
295 if (ret != 1)
296 return -1;
297
298 s = cpu_map__get_socket(map, idx);
299 if (s == -1)
300 return -1;
301
302 /*
303 * encode socket in upper 16 bits
304 * core_id is relative to socket, and
305 * we need a global id. So we combine
306 * socket+ core id
307 */
308 return (s << 16) | (cpu & 0xffff);
309}
310
270int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) 311int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
271{ 312{
272 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket); 313 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
273} 314}
315
316int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
317{
318 return cpu_map__build_map(cpus, corep, cpu_map__get_core);
319}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 161b00756a12..9bed02e5fb3d 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map);
15struct cpu_map *cpu_map__read(FILE *file); 15struct cpu_map *cpu_map__read(FILE *file);
16size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); 16size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
17int cpu_map__get_socket(struct cpu_map *map, int idx); 17int cpu_map__get_socket(struct cpu_map *map, int idx);
18int cpu_map__get_core(struct cpu_map *map, int idx);
18int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 19int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
20int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
19 21
20static inline int cpu_map__socket(struct cpu_map *sock, int s) 22static inline int cpu_map__socket(struct cpu_map *sock, int s)
21{ 23{
@@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
24 return sock->map[s]; 26 return sock->map[s];
25} 27}
26 28
29static inline int cpu_map__id_to_socket(int id)
30{
31 return id >> 16;
32}
33
34static inline int cpu_map__id_to_cpu(int id)
35{
36 return id & 0xffff;
37}
38
27static inline int cpu_map__nr(const struct cpu_map *map) 39static inline int cpu_map__nr(const struct cpu_map *map)
28{ 40{
29 return map ? map->nr : 1; 41 return map ? map->nr : 1;