aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2015-06-26 05:29:27 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-06-26 11:05:04 -0400
commit32b8af82e3708bc19af75c31fa773a98449f0edc (patch)
treea8b44deef358b4757f79fa5fd055317af4b7373c /tools
parentd4f63a4741a808c0bf25d92884713008706fca16 (diff)
perf stat: Introduce --per-thread option
Currently all the -p option PID arguments tasks values get aggregated and printed as single values. Adding --per-tasks option to print values per task. $ perf stat -e cycles,instructions --per-thread -p 30190,30242 ^C Performance counter stats for process id '30190,30242': cat-30190 0 cycles yes-30242 3,842,525,421 cycles cat-30190 0 instructions yes-30242 10,370,817,010 instructions 1.143155657 seconds time elapsed Also works under interval mode: $ perf stat -e cycles,instructions --per-thread -p 30190,30242 -I 1000 # time comm-pid counts unit events 1.000073435 cat-30190 89,058 cycles 1.000073435 yes-30242 3,360,786,902 cycles (100.00%) 1.000073435 cat-30190 14,066 instructions 1.000073435 yes-30242 9,069,937,462 instructions 2.000204830 cat-30190 0 cycles 2.000204830 yes-30242 3,351,667,626 cycles 2.000204830 cat-30190 0 instructions 2.000204830 yes-30242 9,045,796,885 instructions ^C 2.771286639 cat-30190 0 cycles 2.771286639 yes-30242 2,593,884,166 cycles 2.771286639 cat-30190 0 instructions 2.771286639 yes-30242 7,001,171,191 instructions It works only with -t and -p options, otherwise following error is printed: $ perf stat -e cycles --per-thread -I 1000 ls The --per-thread option is only available when monitoring via -p -t options. -p, --pid <pid> stat events on existing process id -t, --tid <tid> stat events on existing thread id Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1435310967-14570-23-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/builtin-stat.c76
-rw-r--r--tools/perf/util/stat.h1
3 files changed, 79 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d83e7d..47469abdcc1c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
144use --per-core in addition to -a. (system-wide). The output includes the 144use --per-core in addition to -a. (system-wide). The output includes the
145core number and the number of online logical processors on that physical processor. 145core number and the number of online logical processors on that physical processor.
146 146
147--per-thread::
148Aggregate counts per monitored threads, when monitoring threads (-t option)
149or processes (-p option).
150
147-D msecs:: 151-D msecs::
148--delay msecs:: 152--delay msecs::
149After starting the program, wait msecs before measuring. This is useful to 153After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 56dc8881cb05..37e301a32f43 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
231 count = &zero; 231 count = &zero;
232 232
233 switch (aggr_mode) { 233 switch (aggr_mode) {
234 case AGGR_THREAD:
234 case AGGR_CORE: 235 case AGGR_CORE:
235 case AGGR_SOCKET: 236 case AGGR_SOCKET:
236 case AGGR_NONE: 237 case AGGR_NONE:
@@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
602 csv_output ? 0 : -4, 603 csv_output ? 0 : -4,
603 perf_evsel__cpus(evsel)->map[id], csv_sep); 604 perf_evsel__cpus(evsel)->map[id], csv_sep);
604 break; 605 break;
606 case AGGR_THREAD:
607 fprintf(output, "%*s-%*d%s",
608 csv_output ? 0 : 16,
609 thread_map__comm(evsel->threads, id),
610 csv_output ? 0 : -8,
611 thread_map__pid(evsel->threads, id),
612 csv_sep);
613 break;
605 case AGGR_GLOBAL: 614 case AGGR_GLOBAL:
606 default: 615 default:
607 break; 616 break;
@@ -750,6 +759,40 @@ static void print_aggr(char *prefix)
750 } 759 }
751} 760}
752 761
762static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
763{
764 int nthreads = thread_map__nr(counter->threads);
765 int ncpus = cpu_map__nr(counter->cpus);
766 int cpu, thread;
767 double uval;
768
769 for (thread = 0; thread < nthreads; thread++) {
770 u64 ena = 0, run = 0, val = 0;
771
772 for (cpu = 0; cpu < ncpus; cpu++) {
773 val += perf_counts(counter->counts, cpu, thread)->val;
774 ena += perf_counts(counter->counts, cpu, thread)->ena;
775 run += perf_counts(counter->counts, cpu, thread)->run;
776 }
777
778 if (prefix)
779 fprintf(output, "%s", prefix);
780
781 uval = val * counter->scale;
782
783 if (nsec_counter(counter))
784 nsec_printout(thread, 0, counter, uval);
785 else
786 abs_printout(thread, 0, counter, uval);
787
788 if (!csv_output)
789 print_noise(counter, 1.0);
790
791 print_running(run, ena);
792 fputc('\n', output);
793 }
794}
795
753/* 796/*
754 * Print out the results of a single counter: 797 * Print out the results of a single counter:
755 * aggregated counts in system-wide mode 798 * aggregated counts in system-wide mode
@@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts)
876 case AGGR_NONE: 919 case AGGR_NONE:
877 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 920 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
878 break; 921 break;
922 case AGGR_THREAD:
923 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
924 break;
879 case AGGR_GLOBAL: 925 case AGGR_GLOBAL:
880 default: 926 default:
881 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 927 fprintf(output, "# time counts %*s events\n", unit_width, "unit");
@@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
944 case AGGR_SOCKET: 990 case AGGR_SOCKET:
945 print_aggr(prefix); 991 print_aggr(prefix);
946 break; 992 break;
993 case AGGR_THREAD:
994 evlist__for_each(evsel_list, counter)
995 print_aggr_thread(counter, prefix);
996 break;
947 case AGGR_GLOBAL: 997 case AGGR_GLOBAL:
948 evlist__for_each(evsel_list, counter) 998 evlist__for_each(evsel_list, counter)
949 print_counter_aggr(counter, prefix); 999 print_counter_aggr(counter, prefix);
@@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void)
1031 break; 1081 break;
1032 case AGGR_NONE: 1082 case AGGR_NONE:
1033 case AGGR_GLOBAL: 1083 case AGGR_GLOBAL:
1084 case AGGR_THREAD:
1034 default: 1085 default:
1035 break; 1086 break;
1036 } 1087 }
@@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1255 "aggregate counts per processor socket", AGGR_SOCKET), 1306 "aggregate counts per processor socket", AGGR_SOCKET),
1256 OPT_SET_UINT(0, "per-core", &aggr_mode, 1307 OPT_SET_UINT(0, "per-core", &aggr_mode,
1257 "aggregate counts per physical processor core", AGGR_CORE), 1308 "aggregate counts per physical processor core", AGGR_CORE),
1309 OPT_SET_UINT(0, "per-thread", &aggr_mode,
1310 "aggregate counts per thread", AGGR_THREAD),
1258 OPT_UINTEGER('D', "delay", &initial_delay, 1311 OPT_UINTEGER('D', "delay", &initial_delay,
1259 "ms to wait before starting measurement after program start"), 1312 "ms to wait before starting measurement after program start"),
1260 OPT_END() 1313 OPT_END()
@@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1346 run_count = 1; 1399 run_count = 1;
1347 } 1400 }
1348 1401
1349 /* no_aggr, cgroup are for system-wide only */ 1402 if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1350 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && 1403 fprintf(stderr, "The --per-thread option is only available "
1404 "when monitoring via -p -t options.\n");
1405 parse_options_usage(NULL, options, "p", 1);
1406 parse_options_usage(NULL, options, "t", 1);
1407 goto out;
1408 }
1409
1410 /*
1411 * no_aggr, cgroup are for system-wide only
1412 * --per-thread is aggregated per thread, we dont mix it with cpu mode
1413 */
1414 if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1351 !target__has_cpu(&target)) { 1415 !target__has_cpu(&target)) {
1352 fprintf(stderr, "both cgroup and no-aggregation " 1416 fprintf(stderr, "both cgroup and no-aggregation "
1353 "modes only available in system-wide mode\n"); 1417 "modes only available in system-wide mode\n");
@@ -1375,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1375 } 1439 }
1376 goto out; 1440 goto out;
1377 } 1441 }
1442
1443 /*
1444 * Initialize thread_map with comm names,
1445 * so we could print it out on output.
1446 */
1447 if (aggr_mode == AGGR_THREAD)
1448 thread_map__read_comms(evsel_list->threads);
1449
1378 if (interval && interval < 100) { 1450 if (interval && interval < 100) {
1379 pr_err("print interval must be >= 100ms\n"); 1451 pr_err("print interval must be >= 100ms\n");
1380 parse_options_usage(stat_usage, options, "I", 1); 1452 parse_options_usage(stat_usage, options, "I", 1);
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 9f05c571befe..1cfbe0a980ac 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -30,6 +30,7 @@ enum aggr_mode {
30 AGGR_GLOBAL, 30 AGGR_GLOBAL,
31 AGGR_SOCKET, 31 AGGR_SOCKET,
32 AGGR_CORE, 32 AGGR_CORE,
33 AGGR_THREAD,
33}; 34};
34 35
35struct perf_counts_values { 36struct perf_counts_values {