diff options
author | Jiri Olsa <jolsa@kernel.org> | 2015-06-26 05:29:27 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2015-06-26 11:05:04 -0400 |
commit | 32b8af82e3708bc19af75c31fa773a98449f0edc (patch) | |
tree | a8b44deef358b4757f79fa5fd055317af4b7373c /tools | |
parent | d4f63a4741a808c0bf25d92884713008706fca16 (diff) |
perf stat: Introduce --per-thread option
Currently all the -p option PID arguments tasks values get aggregated
and printed as single values.
Adding --per-tasks option to print values per task.
$ perf stat -e cycles,instructions --per-thread -p 30190,30242
^C
Performance counter stats for process id '30190,30242':
cat-30190 0 cycles
yes-30242 3,842,525,421 cycles
cat-30190 0 instructions
yes-30242 10,370,817,010 instructions
1.143155657 seconds time elapsed
Also works under interval mode:
$ perf stat -e cycles,instructions --per-thread -p 30190,30242 -I 1000
# time comm-pid counts unit events
1.000073435 cat-30190 89,058 cycles
1.000073435 yes-30242 3,360,786,902 cycles (100.00%)
1.000073435 cat-30190 14,066 instructions
1.000073435 yes-30242 9,069,937,462 instructions
2.000204830 cat-30190 0 cycles
2.000204830 yes-30242 3,351,667,626 cycles
2.000204830 cat-30190 0 instructions
2.000204830 yes-30242 9,045,796,885 instructions
^C 2.771286639 cat-30190 0 cycles
2.771286639 yes-30242 2,593,884,166 cycles
2.771286639 cat-30190 0 instructions
2.771286639 yes-30242 7,001,171,191 instructions
It works only with -t and -p options, otherwise following error is
printed:
$ perf stat -e cycles --per-thread -I 1000 ls
The --per-thread option is only available when monitoring via -p -t options.
-p, --pid <pid> stat events on existing process id
-t, --tid <tid> stat events on existing thread id
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1435310967-14570-23-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 4 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 76 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 1 |
3 files changed, 79 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04e150d83e7d..47469abdcc1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod | |||
144 | use --per-core in addition to -a. (system-wide). The output includes the | 144 | use --per-core in addition to -a. (system-wide). The output includes the |
145 | core number and the number of online logical processors on that physical processor. | 145 | core number and the number of online logical processors on that physical processor. |
146 | 146 | ||
147 | --per-thread:: | ||
148 | Aggregate counts per monitored threads, when monitoring threads (-t option) | ||
149 | or processes (-p option). | ||
150 | |||
147 | -D msecs:: | 151 | -D msecs:: |
148 | --delay msecs:: | 152 | --delay msecs:: |
149 | After starting the program, wait msecs before measuring. This is useful to | 153 | After starting the program, wait msecs before measuring. This is useful to |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 56dc8881cb05..37e301a32f43 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, | |||
231 | count = &zero; | 231 | count = &zero; |
232 | 232 | ||
233 | switch (aggr_mode) { | 233 | switch (aggr_mode) { |
234 | case AGGR_THREAD: | ||
234 | case AGGR_CORE: | 235 | case AGGR_CORE: |
235 | case AGGR_SOCKET: | 236 | case AGGR_SOCKET: |
236 | case AGGR_NONE: | 237 | case AGGR_NONE: |
@@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
602 | csv_output ? 0 : -4, | 603 | csv_output ? 0 : -4, |
603 | perf_evsel__cpus(evsel)->map[id], csv_sep); | 604 | perf_evsel__cpus(evsel)->map[id], csv_sep); |
604 | break; | 605 | break; |
606 | case AGGR_THREAD: | ||
607 | fprintf(output, "%*s-%*d%s", | ||
608 | csv_output ? 0 : 16, | ||
609 | thread_map__comm(evsel->threads, id), | ||
610 | csv_output ? 0 : -8, | ||
611 | thread_map__pid(evsel->threads, id), | ||
612 | csv_sep); | ||
613 | break; | ||
605 | case AGGR_GLOBAL: | 614 | case AGGR_GLOBAL: |
606 | default: | 615 | default: |
607 | break; | 616 | break; |
@@ -750,6 +759,40 @@ static void print_aggr(char *prefix) | |||
750 | } | 759 | } |
751 | } | 760 | } |
752 | 761 | ||
762 | static void print_aggr_thread(struct perf_evsel *counter, char *prefix) | ||
763 | { | ||
764 | int nthreads = thread_map__nr(counter->threads); | ||
765 | int ncpus = cpu_map__nr(counter->cpus); | ||
766 | int cpu, thread; | ||
767 | double uval; | ||
768 | |||
769 | for (thread = 0; thread < nthreads; thread++) { | ||
770 | u64 ena = 0, run = 0, val = 0; | ||
771 | |||
772 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
773 | val += perf_counts(counter->counts, cpu, thread)->val; | ||
774 | ena += perf_counts(counter->counts, cpu, thread)->ena; | ||
775 | run += perf_counts(counter->counts, cpu, thread)->run; | ||
776 | } | ||
777 | |||
778 | if (prefix) | ||
779 | fprintf(output, "%s", prefix); | ||
780 | |||
781 | uval = val * counter->scale; | ||
782 | |||
783 | if (nsec_counter(counter)) | ||
784 | nsec_printout(thread, 0, counter, uval); | ||
785 | else | ||
786 | abs_printout(thread, 0, counter, uval); | ||
787 | |||
788 | if (!csv_output) | ||
789 | print_noise(counter, 1.0); | ||
790 | |||
791 | print_running(run, ena); | ||
792 | fputc('\n', output); | ||
793 | } | ||
794 | } | ||
795 | |||
753 | /* | 796 | /* |
754 | * Print out the results of a single counter: | 797 | * Print out the results of a single counter: |
755 | * aggregated counts in system-wide mode | 798 | * aggregated counts in system-wide mode |
@@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts) | |||
876 | case AGGR_NONE: | 919 | case AGGR_NONE: |
877 | fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); | 920 | fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); |
878 | break; | 921 | break; |
922 | case AGGR_THREAD: | ||
923 | fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); | ||
924 | break; | ||
879 | case AGGR_GLOBAL: | 925 | case AGGR_GLOBAL: |
880 | default: | 926 | default: |
881 | fprintf(output, "# time counts %*s events\n", unit_width, "unit"); | 927 | fprintf(output, "# time counts %*s events\n", unit_width, "unit"); |
@@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
944 | case AGGR_SOCKET: | 990 | case AGGR_SOCKET: |
945 | print_aggr(prefix); | 991 | print_aggr(prefix); |
946 | break; | 992 | break; |
993 | case AGGR_THREAD: | ||
994 | evlist__for_each(evsel_list, counter) | ||
995 | print_aggr_thread(counter, prefix); | ||
996 | break; | ||
947 | case AGGR_GLOBAL: | 997 | case AGGR_GLOBAL: |
948 | evlist__for_each(evsel_list, counter) | 998 | evlist__for_each(evsel_list, counter) |
949 | print_counter_aggr(counter, prefix); | 999 | print_counter_aggr(counter, prefix); |
@@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void) | |||
1031 | break; | 1081 | break; |
1032 | case AGGR_NONE: | 1082 | case AGGR_NONE: |
1033 | case AGGR_GLOBAL: | 1083 | case AGGR_GLOBAL: |
1084 | case AGGR_THREAD: | ||
1034 | default: | 1085 | default: |
1035 | break; | 1086 | break; |
1036 | } | 1087 | } |
@@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1255 | "aggregate counts per processor socket", AGGR_SOCKET), | 1306 | "aggregate counts per processor socket", AGGR_SOCKET), |
1256 | OPT_SET_UINT(0, "per-core", &aggr_mode, | 1307 | OPT_SET_UINT(0, "per-core", &aggr_mode, |
1257 | "aggregate counts per physical processor core", AGGR_CORE), | 1308 | "aggregate counts per physical processor core", AGGR_CORE), |
1309 | OPT_SET_UINT(0, "per-thread", &aggr_mode, | ||
1310 | "aggregate counts per thread", AGGR_THREAD), | ||
1258 | OPT_UINTEGER('D', "delay", &initial_delay, | 1311 | OPT_UINTEGER('D', "delay", &initial_delay, |
1259 | "ms to wait before starting measurement after program start"), | 1312 | "ms to wait before starting measurement after program start"), |
1260 | OPT_END() | 1313 | OPT_END() |
@@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1346 | run_count = 1; | 1399 | run_count = 1; |
1347 | } | 1400 | } |
1348 | 1401 | ||
1349 | /* no_aggr, cgroup are for system-wide only */ | 1402 | if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { |
1350 | if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && | 1403 | fprintf(stderr, "The --per-thread option is only available " |
1404 | "when monitoring via -p -t options.\n"); | ||
1405 | parse_options_usage(NULL, options, "p", 1); | ||
1406 | parse_options_usage(NULL, options, "t", 1); | ||
1407 | goto out; | ||
1408 | } | ||
1409 | |||
1410 | /* | ||
1411 | * no_aggr, cgroup are for system-wide only | ||
1412 | * --per-thread is aggregated per thread, we dont mix it with cpu mode | ||
1413 | */ | ||
1414 | if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && | ||
1351 | !target__has_cpu(&target)) { | 1415 | !target__has_cpu(&target)) { |
1352 | fprintf(stderr, "both cgroup and no-aggregation " | 1416 | fprintf(stderr, "both cgroup and no-aggregation " |
1353 | "modes only available in system-wide mode\n"); | 1417 | "modes only available in system-wide mode\n"); |
@@ -1375,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1375 | } | 1439 | } |
1376 | goto out; | 1440 | goto out; |
1377 | } | 1441 | } |
1442 | |||
1443 | /* | ||
1444 | * Initialize thread_map with comm names, | ||
1445 | * so we could print it out on output. | ||
1446 | */ | ||
1447 | if (aggr_mode == AGGR_THREAD) | ||
1448 | thread_map__read_comms(evsel_list->threads); | ||
1449 | |||
1378 | if (interval && interval < 100) { | 1450 | if (interval && interval < 100) { |
1379 | pr_err("print interval must be >= 100ms\n"); | 1451 | pr_err("print interval must be >= 100ms\n"); |
1380 | parse_options_usage(stat_usage, options, "I", 1); | 1452 | parse_options_usage(stat_usage, options, "I", 1); |
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9f05c571befe..1cfbe0a980ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -30,6 +30,7 @@ enum aggr_mode { | |||
30 | AGGR_GLOBAL, | 30 | AGGR_GLOBAL, |
31 | AGGR_SOCKET, | 31 | AGGR_SOCKET, |
32 | AGGR_CORE, | 32 | AGGR_CORE, |
33 | AGGR_THREAD, | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | struct perf_counts_values { | 36 | struct perf_counts_values { |