diff options
Diffstat (limited to 'tools/perf')
| -rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 4 | ||||
| -rw-r--r-- | tools/perf/builtin-stat.c | 76 | ||||
| -rw-r--r-- | tools/perf/util/stat.h | 1 |
3 files changed, 79 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04e150d83e7d..47469abdcc1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
| @@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod | |||
| 144 | use --per-core in addition to -a. (system-wide). The output includes the | 144 | use --per-core in addition to -a. (system-wide). The output includes the |
| 145 | core number and the number of online logical processors on that physical processor. | 145 | core number and the number of online logical processors on that physical processor. |
| 146 | 146 | ||
| 147 | --per-thread:: | ||
| 148 | Aggregate counts per monitored threads, when monitoring threads (-t option) | ||
| 149 | or processes (-p option). | ||
| 150 | |||
| 147 | -D msecs:: | 151 | -D msecs:: |
| 148 | --delay msecs:: | 152 | --delay msecs:: |
| 149 | After starting the program, wait msecs before measuring. This is useful to | 153 | After starting the program, wait msecs before measuring. This is useful to |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 56dc8881cb05..37e301a32f43 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, | |||
| 231 | count = &zero; | 231 | count = &zero; |
| 232 | 232 | ||
| 233 | switch (aggr_mode) { | 233 | switch (aggr_mode) { |
| 234 | case AGGR_THREAD: | ||
| 234 | case AGGR_CORE: | 235 | case AGGR_CORE: |
| 235 | case AGGR_SOCKET: | 236 | case AGGR_SOCKET: |
| 236 | case AGGR_NONE: | 237 | case AGGR_NONE: |
| @@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
| 602 | csv_output ? 0 : -4, | 603 | csv_output ? 0 : -4, |
| 603 | perf_evsel__cpus(evsel)->map[id], csv_sep); | 604 | perf_evsel__cpus(evsel)->map[id], csv_sep); |
| 604 | break; | 605 | break; |
| 606 | case AGGR_THREAD: | ||
| 607 | fprintf(output, "%*s-%*d%s", | ||
| 608 | csv_output ? 0 : 16, | ||
| 609 | thread_map__comm(evsel->threads, id), | ||
| 610 | csv_output ? 0 : -8, | ||
| 611 | thread_map__pid(evsel->threads, id), | ||
| 612 | csv_sep); | ||
| 613 | break; | ||
| 605 | case AGGR_GLOBAL: | 614 | case AGGR_GLOBAL: |
| 606 | default: | 615 | default: |
| 607 | break; | 616 | break; |
| @@ -750,6 +759,40 @@ static void print_aggr(char *prefix) | |||
| 750 | } | 759 | } |
| 751 | } | 760 | } |
| 752 | 761 | ||
| 762 | static void print_aggr_thread(struct perf_evsel *counter, char *prefix) | ||
| 763 | { | ||
| 764 | int nthreads = thread_map__nr(counter->threads); | ||
| 765 | int ncpus = cpu_map__nr(counter->cpus); | ||
| 766 | int cpu, thread; | ||
| 767 | double uval; | ||
| 768 | |||
| 769 | for (thread = 0; thread < nthreads; thread++) { | ||
| 770 | u64 ena = 0, run = 0, val = 0; | ||
| 771 | |||
| 772 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
| 773 | val += perf_counts(counter->counts, cpu, thread)->val; | ||
| 774 | ena += perf_counts(counter->counts, cpu, thread)->ena; | ||
| 775 | run += perf_counts(counter->counts, cpu, thread)->run; | ||
| 776 | } | ||
| 777 | |||
| 778 | if (prefix) | ||
| 779 | fprintf(output, "%s", prefix); | ||
| 780 | |||
| 781 | uval = val * counter->scale; | ||
| 782 | |||
| 783 | if (nsec_counter(counter)) | ||
| 784 | nsec_printout(thread, 0, counter, uval); | ||
| 785 | else | ||
| 786 | abs_printout(thread, 0, counter, uval); | ||
| 787 | |||
| 788 | if (!csv_output) | ||
| 789 | print_noise(counter, 1.0); | ||
| 790 | |||
| 791 | print_running(run, ena); | ||
| 792 | fputc('\n', output); | ||
| 793 | } | ||
| 794 | } | ||
| 795 | |||
| 753 | /* | 796 | /* |
| 754 | * Print out the results of a single counter: | 797 | * Print out the results of a single counter: |
| 755 | * aggregated counts in system-wide mode | 798 | * aggregated counts in system-wide mode |
| @@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts) | |||
| 876 | case AGGR_NONE: | 919 | case AGGR_NONE: |
| 877 | fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); | 920 | fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); |
| 878 | break; | 921 | break; |
| 922 | case AGGR_THREAD: | ||
| 923 | fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); | ||
| 924 | break; | ||
| 879 | case AGGR_GLOBAL: | 925 | case AGGR_GLOBAL: |
| 880 | default: | 926 | default: |
| 881 | fprintf(output, "# time counts %*s events\n", unit_width, "unit"); | 927 | fprintf(output, "# time counts %*s events\n", unit_width, "unit"); |
| @@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
| 944 | case AGGR_SOCKET: | 990 | case AGGR_SOCKET: |
| 945 | print_aggr(prefix); | 991 | print_aggr(prefix); |
| 946 | break; | 992 | break; |
| 993 | case AGGR_THREAD: | ||
| 994 | evlist__for_each(evsel_list, counter) | ||
| 995 | print_aggr_thread(counter, prefix); | ||
| 996 | break; | ||
| 947 | case AGGR_GLOBAL: | 997 | case AGGR_GLOBAL: |
| 948 | evlist__for_each(evsel_list, counter) | 998 | evlist__for_each(evsel_list, counter) |
| 949 | print_counter_aggr(counter, prefix); | 999 | print_counter_aggr(counter, prefix); |
| @@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void) | |||
| 1031 | break; | 1081 | break; |
| 1032 | case AGGR_NONE: | 1082 | case AGGR_NONE: |
| 1033 | case AGGR_GLOBAL: | 1083 | case AGGR_GLOBAL: |
| 1084 | case AGGR_THREAD: | ||
| 1034 | default: | 1085 | default: |
| 1035 | break; | 1086 | break; |
| 1036 | } | 1087 | } |
| @@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1255 | "aggregate counts per processor socket", AGGR_SOCKET), | 1306 | "aggregate counts per processor socket", AGGR_SOCKET), |
| 1256 | OPT_SET_UINT(0, "per-core", &aggr_mode, | 1307 | OPT_SET_UINT(0, "per-core", &aggr_mode, |
| 1257 | "aggregate counts per physical processor core", AGGR_CORE), | 1308 | "aggregate counts per physical processor core", AGGR_CORE), |
| 1309 | OPT_SET_UINT(0, "per-thread", &aggr_mode, | ||
| 1310 | "aggregate counts per thread", AGGR_THREAD), | ||
| 1258 | OPT_UINTEGER('D', "delay", &initial_delay, | 1311 | OPT_UINTEGER('D', "delay", &initial_delay, |
| 1259 | "ms to wait before starting measurement after program start"), | 1312 | "ms to wait before starting measurement after program start"), |
| 1260 | OPT_END() | 1313 | OPT_END() |
| @@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1346 | run_count = 1; | 1399 | run_count = 1; |
| 1347 | } | 1400 | } |
| 1348 | 1401 | ||
| 1349 | /* no_aggr, cgroup are for system-wide only */ | 1402 | if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { |
| 1350 | if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && | 1403 | fprintf(stderr, "The --per-thread option is only available " |
| 1404 | "when monitoring via -p -t options.\n"); | ||
| 1405 | parse_options_usage(NULL, options, "p", 1); | ||
| 1406 | parse_options_usage(NULL, options, "t", 1); | ||
| 1407 | goto out; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | /* | ||
| 1411 | * no_aggr, cgroup are for system-wide only | ||
| 1412 | * --per-thread is aggregated per thread, we dont mix it with cpu mode | ||
| 1413 | */ | ||
| 1414 | if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && | ||
| 1351 | !target__has_cpu(&target)) { | 1415 | !target__has_cpu(&target)) { |
| 1352 | fprintf(stderr, "both cgroup and no-aggregation " | 1416 | fprintf(stderr, "both cgroup and no-aggregation " |
| 1353 | "modes only available in system-wide mode\n"); | 1417 | "modes only available in system-wide mode\n"); |
| @@ -1375,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1375 | } | 1439 | } |
| 1376 | goto out; | 1440 | goto out; |
| 1377 | } | 1441 | } |
| 1442 | |||
| 1443 | /* | ||
| 1444 | * Initialize thread_map with comm names, | ||
| 1445 | * so we could print it out on output. | ||
| 1446 | */ | ||
| 1447 | if (aggr_mode == AGGR_THREAD) | ||
| 1448 | thread_map__read_comms(evsel_list->threads); | ||
| 1449 | |||
| 1378 | if (interval && interval < 100) { | 1450 | if (interval && interval < 100) { |
| 1379 | pr_err("print interval must be >= 100ms\n"); | 1451 | pr_err("print interval must be >= 100ms\n"); |
| 1380 | parse_options_usage(stat_usage, options, "I", 1); | 1452 | parse_options_usage(stat_usage, options, "I", 1); |
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9f05c571befe..1cfbe0a980ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
| @@ -30,6 +30,7 @@ enum aggr_mode { | |||
| 30 | AGGR_GLOBAL, | 30 | AGGR_GLOBAL, |
| 31 | AGGR_SOCKET, | 31 | AGGR_SOCKET, |
| 32 | AGGR_CORE, | 32 | AGGR_CORE, |
| 33 | AGGR_THREAD, | ||
| 33 | }; | 34 | }; |
| 34 | 35 | ||
| 35 | struct perf_counts_values { | 36 | struct perf_counts_values { |
