aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-06-30 00:47:32 -0400
committerIngo Molnar <mingo@kernel.org>2015-06-30 00:47:58 -0400
commit2d6dac2fcc796a9a2917d69bcab66f6b157fe51b (patch)
treeca03e7f2fa83e2401af8f1c773c297baae2115b7
parent6eedf416429a32e0216f61b8b690d25577b2b91e (diff)
parent36c8bb56a9f718a9a5f35d1834ca9dcec95deb4a (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Validate syscall list passed via -e argument to 'perf trace'. (Arnaldo Carvalho de Melo) - Introduce 'perf stat --per-thread'. (Jiri Olsa) - Check access permission for --kallsyms and --vmlinux. (Li Zhang) Infrastructure changes: - Move stuff out of 'perf stat' and into the lib for further use. (Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/builtin-report.c11
-rw-r--r--tools/perf/builtin-stat.c402
-rw-r--r--tools/perf/builtin-trace.c32
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c6
-rw-r--r--tools/perf/tests/openat-syscall.c4
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/tests/thread-map.c38
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c24
-rw-r--r--tools/perf/util/evsel.h28
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/stat.c132
-rw-r--r--tools/perf/util/stat.h47
-rw-r--r--tools/perf/util/symbol.c5
-rw-r--r--tools/perf/util/thread_map.c76
-rw-r--r--tools/perf/util/thread_map.h8
19 files changed, 570 insertions, 255 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d83e7d..47469abdcc1c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
144use --per-core in addition to -a. (system-wide). The output includes the 144use --per-core in addition to -a. (system-wide). The output includes the
145core number and the number of online logical processors on that physical processor. 145core number and the number of online logical processors on that physical processor.
146 146
147--per-thread::
148Aggregate counts per monitored threads, when monitoring threads (-t option)
149or processes (-p option).
150
147-D msecs:: 151-D msecs::
148--delay msecs:: 152--delay msecs::
149After starting the program, wait msecs before measuring. This is useful to 153After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 32626ea3e227..348bed4a2abf 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -742,6 +742,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
742 742
743 argc = parse_options(argc, argv, options, report_usage, 0); 743 argc = parse_options(argc, argv, options, report_usage, 0);
744 744
745 if (symbol_conf.vmlinux_name &&
746 access(symbol_conf.vmlinux_name, R_OK)) {
747 pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
748 return -EINVAL;
749 }
750 if (symbol_conf.kallsyms_name &&
751 access(symbol_conf.kallsyms_name, R_OK)) {
752 pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
753 return -EINVAL;
754 }
755
745 if (report.use_stdio) 756 if (report.use_stdio)
746 use_browser = 0; 757 use_browser = 0;
747 else if (report.use_tui) 758 else if (report.use_tui)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3e1636cae76b..37e301a32f43 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -67,10 +67,7 @@
67#define CNTR_NOT_SUPPORTED "<not supported>" 67#define CNTR_NOT_SUPPORTED "<not supported>"
68#define CNTR_NOT_COUNTED "<not counted>" 68#define CNTR_NOT_COUNTED "<not counted>"
69 69
70static void print_stat(int argc, const char **argv); 70static void print_counters(struct timespec *ts, int argc, const char **argv);
71static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
72static void print_counter(struct perf_evsel *counter, char *prefix);
73static void print_aggr(char *prefix);
74 71
75/* Default events used for perf stat -T */ 72/* Default events used for perf stat -T */
76static const char *transaction_attrs = { 73static const char *transaction_attrs = {
@@ -141,86 +138,9 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a,
141 } 138 }
142} 139}
143 140
144static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) 141static void perf_stat__reset_stats(void)
145{ 142{
146 int i; 143 perf_evlist__reset_stats(evsel_list);
147 struct perf_stat *ps = evsel->priv;
148
149 for (i = 0; i < 3; i++)
150 init_stats(&ps->res_stats[i]);
151
152 perf_stat_evsel_id_init(evsel);
153}
154
155static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
156{
157 evsel->priv = zalloc(sizeof(struct perf_stat));
158 if (evsel->priv == NULL)
159 return -ENOMEM;
160 perf_evsel__reset_stat_priv(evsel);
161 return 0;
162}
163
164static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
165{
166 zfree(&evsel->priv);
167}
168
169static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
170{
171 struct perf_counts *counts;
172
173 counts = perf_counts__new(perf_evsel__nr_cpus(evsel));
174 if (counts)
175 evsel->prev_raw_counts = counts;
176
177 return counts ? 0 : -ENOMEM;
178}
179
180static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
181{
182 perf_counts__delete(evsel->prev_raw_counts);
183 evsel->prev_raw_counts = NULL;
184}
185
186static void perf_evlist__free_stats(struct perf_evlist *evlist)
187{
188 struct perf_evsel *evsel;
189
190 evlist__for_each(evlist, evsel) {
191 perf_evsel__free_stat_priv(evsel);
192 perf_evsel__free_counts(evsel);
193 perf_evsel__free_prev_raw_counts(evsel);
194 }
195}
196
197static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
198{
199 struct perf_evsel *evsel;
200
201 evlist__for_each(evlist, evsel) {
202 if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
203 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
204 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
205 goto out_free;
206 }
207
208 return 0;
209
210out_free:
211 perf_evlist__free_stats(evlist);
212 return -1;
213}
214
215static void perf_stat__reset_stats(struct perf_evlist *evlist)
216{
217 struct perf_evsel *evsel;
218
219 evlist__for_each(evlist, evsel) {
220 perf_evsel__reset_stat_priv(evsel);
221 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
222 }
223
224 perf_stat__reset_shadow_stats(); 144 perf_stat__reset_shadow_stats();
225} 145}
226 146
@@ -294,8 +214,9 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
294 return 0; 214 return 0;
295} 215}
296 216
297static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, 217static int
298 struct perf_counts_values *count) 218process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
219 struct perf_counts_values *count)
299{ 220{
300 struct perf_counts_values *aggr = &evsel->counts->aggr; 221 struct perf_counts_values *aggr = &evsel->counts->aggr;
301 static struct perf_counts_values zero; 222 static struct perf_counts_values zero;
@@ -310,13 +231,13 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
310 count = &zero; 231 count = &zero;
311 232
312 switch (aggr_mode) { 233 switch (aggr_mode) {
234 case AGGR_THREAD:
313 case AGGR_CORE: 235 case AGGR_CORE:
314 case AGGR_SOCKET: 236 case AGGR_SOCKET:
315 case AGGR_NONE: 237 case AGGR_NONE:
316 if (!evsel->snapshot) 238 if (!evsel->snapshot)
317 perf_evsel__compute_deltas(evsel, cpu, count); 239 perf_evsel__compute_deltas(evsel, cpu, thread, count);
318 perf_counts_values__scale(count, scale, NULL); 240 perf_counts_values__scale(count, scale, NULL);
319 evsel->counts->cpu[cpu] = *count;
320 if (aggr_mode == AGGR_NONE) 241 if (aggr_mode == AGGR_NONE)
321 perf_stat__update_shadow_stats(evsel, count->values, cpu); 242 perf_stat__update_shadow_stats(evsel, count->values, cpu);
322 break; 243 break;
@@ -333,26 +254,48 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
333 return 0; 254 return 0;
334} 255}
335 256
336static int read_counter(struct perf_evsel *counter); 257static int process_counter_maps(struct perf_evsel *counter)
258{
259 int nthreads = thread_map__nr(counter->threads);
260 int ncpus = perf_evsel__nr_cpus(counter);
261 int cpu, thread;
337 262
338/* 263 if (counter->system_wide)
339 * Read out the results of a single counter: 264 nthreads = 1;
340 * aggregate counts across CPUs in system-wide mode 265
341 */ 266 for (thread = 0; thread < nthreads; thread++) {
342static int read_counter_aggr(struct perf_evsel *counter) 267 for (cpu = 0; cpu < ncpus; cpu++) {
268 if (process_counter_values(counter, cpu, thread,
269 perf_counts(counter->counts, cpu, thread)))
270 return -1;
271 }
272 }
273
274 return 0;
275}
276
277static int process_counter(struct perf_evsel *counter)
343{ 278{
344 struct perf_counts_values *aggr = &counter->counts->aggr; 279 struct perf_counts_values *aggr = &counter->counts->aggr;
345 struct perf_stat *ps = counter->priv; 280 struct perf_stat *ps = counter->priv;
346 u64 *count = counter->counts->aggr.values; 281 u64 *count = counter->counts->aggr.values;
347 int i; 282 int i, ret;
348 283
349 aggr->val = aggr->ena = aggr->run = 0; 284 aggr->val = aggr->ena = aggr->run = 0;
285 init_stats(ps->res_stats);
350 286
351 if (read_counter(counter)) 287 if (counter->per_pkg)
352 return -1; 288 zero_per_pkg(counter);
289
290 ret = process_counter_maps(counter);
291 if (ret)
292 return ret;
293
294 if (aggr_mode != AGGR_GLOBAL)
295 return 0;
353 296
354 if (!counter->snapshot) 297 if (!counter->snapshot)
355 perf_evsel__compute_deltas(counter, -1, aggr); 298 perf_evsel__compute_deltas(counter, -1, -1, aggr);
356 perf_counts_values__scale(aggr, scale, &counter->counts->scaled); 299 perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
357 300
358 for (i = 0; i < 3; i++) 301 for (i = 0; i < 3; i++)
@@ -387,12 +330,12 @@ static int read_counter(struct perf_evsel *counter)
387 if (counter->system_wide) 330 if (counter->system_wide)
388 nthreads = 1; 331 nthreads = 1;
389 332
390 if (counter->per_pkg)
391 zero_per_pkg(counter);
392
393 for (thread = 0; thread < nthreads; thread++) { 333 for (thread = 0; thread < nthreads; thread++) {
394 for (cpu = 0; cpu < ncpus; cpu++) { 334 for (cpu = 0; cpu < ncpus; cpu++) {
395 if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) 335 struct perf_counts_values *count;
336
337 count = perf_counts(counter->counts, cpu, thread);
338 if (perf_evsel__read(counter, cpu, thread, count))
396 return -1; 339 return -1;
397 } 340 }
398 } 341 }
@@ -400,68 +343,34 @@ static int read_counter(struct perf_evsel *counter)
400 return 0; 343 return 0;
401} 344}
402 345
403static void print_interval(void) 346static void read_counters(bool close)
404{ 347{
405 static int num_print_interval;
406 struct perf_evsel *counter; 348 struct perf_evsel *counter;
407 struct perf_stat *ps;
408 struct timespec ts, rs;
409 char prefix[64];
410 349
411 if (aggr_mode == AGGR_GLOBAL) { 350 evlist__for_each(evsel_list, counter) {
412 evlist__for_each(evsel_list, counter) { 351 if (read_counter(counter))
413 ps = counter->priv; 352 pr_warning("failed to read counter %s\n", counter->name);
414 memset(ps->res_stats, 0, sizeof(ps->res_stats));
415 read_counter_aggr(counter);
416 }
417 } else {
418 evlist__for_each(evsel_list, counter) {
419 ps = counter->priv;
420 memset(ps->res_stats, 0, sizeof(ps->res_stats));
421 read_counter(counter);
422 }
423 }
424 353
425 clock_gettime(CLOCK_MONOTONIC, &ts); 354 if (process_counter(counter))
426 diff_timespec(&rs, &ts, &ref_time); 355 pr_warning("failed to process counter %s\n", counter->name);
427 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
428 356
429 if (num_print_interval == 0 && !csv_output) { 357 if (close) {
430 switch (aggr_mode) { 358 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
431 case AGGR_SOCKET: 359 thread_map__nr(evsel_list->threads));
432 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
433 break;
434 case AGGR_CORE:
435 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
436 break;
437 case AGGR_NONE:
438 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
439 break;
440 case AGGR_GLOBAL:
441 default:
442 fprintf(output, "# time counts %*s events\n", unit_width, "unit");
443 } 360 }
444 } 361 }
362}
445 363
446 if (++num_print_interval == 25) 364static void process_interval(void)
447 num_print_interval = 0; 365{
366 struct timespec ts, rs;
448 367
449 switch (aggr_mode) { 368 read_counters(false);
450 case AGGR_CORE:
451 case AGGR_SOCKET:
452 print_aggr(prefix);
453 break;
454 case AGGR_NONE:
455 evlist__for_each(evsel_list, counter)
456 print_counter(counter, prefix);
457 break;
458 case AGGR_GLOBAL:
459 default:
460 evlist__for_each(evsel_list, counter)
461 print_counter_aggr(counter, prefix);
462 }
463 369
464 fflush(output); 370 clock_gettime(CLOCK_MONOTONIC, &ts);
371 diff_timespec(&rs, &ts, &ref_time);
372
373 print_counters(&rs, 0, NULL);
465} 374}
466 375
467static void handle_initial_delay(void) 376static void handle_initial_delay(void)
@@ -576,7 +485,7 @@ static int __run_perf_stat(int argc, const char **argv)
576 if (interval) { 485 if (interval) {
577 while (!waitpid(child_pid, &status, WNOHANG)) { 486 while (!waitpid(child_pid, &status, WNOHANG)) {
578 nanosleep(&ts, NULL); 487 nanosleep(&ts, NULL);
579 print_interval(); 488 process_interval();
580 } 489 }
581 } 490 }
582 wait(&status); 491 wait(&status);
@@ -594,7 +503,7 @@ static int __run_perf_stat(int argc, const char **argv)
594 while (!done) { 503 while (!done) {
595 nanosleep(&ts, NULL); 504 nanosleep(&ts, NULL);
596 if (interval) 505 if (interval)
597 print_interval(); 506 process_interval();
598 } 507 }
599 } 508 }
600 509
@@ -602,18 +511,7 @@ static int __run_perf_stat(int argc, const char **argv)
602 511
603 update_stats(&walltime_nsecs_stats, t1 - t0); 512 update_stats(&walltime_nsecs_stats, t1 - t0);
604 513
605 if (aggr_mode == AGGR_GLOBAL) { 514 read_counters(true);
606 evlist__for_each(evsel_list, counter) {
607 read_counter_aggr(counter);
608 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
609 thread_map__nr(evsel_list->threads));
610 }
611 } else {
612 evlist__for_each(evsel_list, counter) {
613 read_counter(counter);
614 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
615 }
616 }
617 515
618 return WEXITSTATUS(status); 516 return WEXITSTATUS(status);
619} 517}
@@ -705,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
705 csv_output ? 0 : -4, 603 csv_output ? 0 : -4,
706 perf_evsel__cpus(evsel)->map[id], csv_sep); 604 perf_evsel__cpus(evsel)->map[id], csv_sep);
707 break; 605 break;
606 case AGGR_THREAD:
607 fprintf(output, "%*s-%*d%s",
608 csv_output ? 0 : 16,
609 thread_map__comm(evsel->threads, id),
610 csv_output ? 0 : -8,
611 thread_map__pid(evsel->threads, id),
612 csv_sep);
613 break;
708 case AGGR_GLOBAL: 614 case AGGR_GLOBAL:
709 default: 615 default:
710 break; 616 break;
@@ -805,9 +711,9 @@ static void print_aggr(char *prefix)
805 s2 = aggr_get_id(evsel_list->cpus, cpu2); 711 s2 = aggr_get_id(evsel_list->cpus, cpu2);
806 if (s2 != id) 712 if (s2 != id)
807 continue; 713 continue;
808 val += counter->counts->cpu[cpu].val; 714 val += perf_counts(counter->counts, cpu, 0)->val;
809 ena += counter->counts->cpu[cpu].ena; 715 ena += perf_counts(counter->counts, cpu, 0)->ena;
810 run += counter->counts->cpu[cpu].run; 716 run += perf_counts(counter->counts, cpu, 0)->run;
811 nr++; 717 nr++;
812 } 718 }
813 if (prefix) 719 if (prefix)
@@ -853,6 +759,40 @@ static void print_aggr(char *prefix)
853 } 759 }
854} 760}
855 761
762static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
763{
764 int nthreads = thread_map__nr(counter->threads);
765 int ncpus = cpu_map__nr(counter->cpus);
766 int cpu, thread;
767 double uval;
768
769 for (thread = 0; thread < nthreads; thread++) {
770 u64 ena = 0, run = 0, val = 0;
771
772 for (cpu = 0; cpu < ncpus; cpu++) {
773 val += perf_counts(counter->counts, cpu, thread)->val;
774 ena += perf_counts(counter->counts, cpu, thread)->ena;
775 run += perf_counts(counter->counts, cpu, thread)->run;
776 }
777
778 if (prefix)
779 fprintf(output, "%s", prefix);
780
781 uval = val * counter->scale;
782
783 if (nsec_counter(counter))
784 nsec_printout(thread, 0, counter, uval);
785 else
786 abs_printout(thread, 0, counter, uval);
787
788 if (!csv_output)
789 print_noise(counter, 1.0);
790
791 print_running(run, ena);
792 fputc('\n', output);
793 }
794}
795
856/* 796/*
857 * Print out the results of a single counter: 797 * Print out the results of a single counter:
858 * aggregated counts in system-wide mode 798 * aggregated counts in system-wide mode
@@ -915,9 +855,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
915 int cpu; 855 int cpu;
916 856
917 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 857 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
918 val = counter->counts->cpu[cpu].val; 858 val = perf_counts(counter->counts, cpu, 0)->val;
919 ena = counter->counts->cpu[cpu].ena; 859 ena = perf_counts(counter->counts, cpu, 0)->ena;
920 run = counter->counts->cpu[cpu].run; 860 run = perf_counts(counter->counts, cpu, 0)->run;
921 861
922 if (prefix) 862 if (prefix)
923 fprintf(output, "%s", prefix); 863 fprintf(output, "%s", prefix);
@@ -962,9 +902,38 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
962 } 902 }
963} 903}
964 904
965static void print_stat(int argc, const char **argv) 905static void print_interval(char *prefix, struct timespec *ts)
906{
907 static int num_print_interval;
908
909 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
910
911 if (num_print_interval == 0 && !csv_output) {
912 switch (aggr_mode) {
913 case AGGR_SOCKET:
914 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
915 break;
916 case AGGR_CORE:
917 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
918 break;
919 case AGGR_NONE:
920 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
921 break;
922 case AGGR_THREAD:
923 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
924 break;
925 case AGGR_GLOBAL:
926 default:
927 fprintf(output, "# time counts %*s events\n", unit_width, "unit");
928 }
929 }
930
931 if (++num_print_interval == 25)
932 num_print_interval = 0;
933}
934
935static void print_header(int argc, const char **argv)
966{ 936{
967 struct perf_evsel *counter;
968 int i; 937 int i;
969 938
970 fflush(stdout); 939 fflush(stdout);
@@ -990,36 +959,57 @@ static void print_stat(int argc, const char **argv)
990 fprintf(output, " (%d runs)", run_count); 959 fprintf(output, " (%d runs)", run_count);
991 fprintf(output, ":\n\n"); 960 fprintf(output, ":\n\n");
992 } 961 }
962}
963
964static void print_footer(void)
965{
966 if (!null_run)
967 fprintf(output, "\n");
968 fprintf(output, " %17.9f seconds time elapsed",
969 avg_stats(&walltime_nsecs_stats)/1e9);
970 if (run_count > 1) {
971 fprintf(output, " ");
972 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
973 avg_stats(&walltime_nsecs_stats));
974 }
975 fprintf(output, "\n\n");
976}
977
978static void print_counters(struct timespec *ts, int argc, const char **argv)
979{
980 struct perf_evsel *counter;
981 char buf[64], *prefix = NULL;
982
983 if (interval)
984 print_interval(prefix = buf, ts);
985 else
986 print_header(argc, argv);
993 987
994 switch (aggr_mode) { 988 switch (aggr_mode) {
995 case AGGR_CORE: 989 case AGGR_CORE:
996 case AGGR_SOCKET: 990 case AGGR_SOCKET:
997 print_aggr(NULL); 991 print_aggr(prefix);
992 break;
993 case AGGR_THREAD:
994 evlist__for_each(evsel_list, counter)
995 print_aggr_thread(counter, prefix);
998 break; 996 break;
999 case AGGR_GLOBAL: 997 case AGGR_GLOBAL:
1000 evlist__for_each(evsel_list, counter) 998 evlist__for_each(evsel_list, counter)
1001 print_counter_aggr(counter, NULL); 999 print_counter_aggr(counter, prefix);
1002 break; 1000 break;
1003 case AGGR_NONE: 1001 case AGGR_NONE:
1004 evlist__for_each(evsel_list, counter) 1002 evlist__for_each(evsel_list, counter)
1005 print_counter(counter, NULL); 1003 print_counter(counter, prefix);
1006 break; 1004 break;
1007 default: 1005 default:
1008 break; 1006 break;
1009 } 1007 }
1010 1008
1011 if (!csv_output) { 1009 if (!interval && !csv_output)
1012 if (!null_run) 1010 print_footer();
1013 fprintf(output, "\n"); 1011
1014 fprintf(output, " %17.9f seconds time elapsed", 1012 fflush(output);
1015 avg_stats(&walltime_nsecs_stats)/1e9);
1016 if (run_count > 1) {
1017 fprintf(output, " ");
1018 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1019 avg_stats(&walltime_nsecs_stats));
1020 }
1021 fprintf(output, "\n\n");
1022 }
1023} 1013}
1024 1014
1025static volatile int signr = -1; 1015static volatile int signr = -1;
@@ -1091,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void)
1091 break; 1081 break;
1092 case AGGR_NONE: 1082 case AGGR_NONE:
1093 case AGGR_GLOBAL: 1083 case AGGR_GLOBAL:
1084 case AGGR_THREAD:
1094 default: 1085 default:
1095 break; 1086 break;
1096 } 1087 }
@@ -1315,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1315 "aggregate counts per processor socket", AGGR_SOCKET), 1306 "aggregate counts per processor socket", AGGR_SOCKET),
1316 OPT_SET_UINT(0, "per-core", &aggr_mode, 1307 OPT_SET_UINT(0, "per-core", &aggr_mode,
1317 "aggregate counts per physical processor core", AGGR_CORE), 1308 "aggregate counts per physical processor core", AGGR_CORE),
1309 OPT_SET_UINT(0, "per-thread", &aggr_mode,
1310 "aggregate counts per thread", AGGR_THREAD),
1318 OPT_UINTEGER('D', "delay", &initial_delay, 1311 OPT_UINTEGER('D', "delay", &initial_delay,
1319 "ms to wait before starting measurement after program start"), 1312 "ms to wait before starting measurement after program start"),
1320 OPT_END() 1313 OPT_END()
@@ -1406,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1406 run_count = 1; 1399 run_count = 1;
1407 } 1400 }
1408 1401
1409 /* no_aggr, cgroup are for system-wide only */ 1402 if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1410 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && 1403 fprintf(stderr, "The --per-thread option is only available "
1404 "when monitoring via -p -t options.\n");
1405 parse_options_usage(NULL, options, "p", 1);
1406 parse_options_usage(NULL, options, "t", 1);
1407 goto out;
1408 }
1409
1410 /*
1411 * no_aggr, cgroup are for system-wide only
1412 * --per-thread is aggregated per thread, we dont mix it with cpu mode
1413 */
1414 if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1411 !target__has_cpu(&target)) { 1415 !target__has_cpu(&target)) {
1412 fprintf(stderr, "both cgroup and no-aggregation " 1416 fprintf(stderr, "both cgroup and no-aggregation "
1413 "modes only available in system-wide mode\n"); 1417 "modes only available in system-wide mode\n");
@@ -1435,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1435 } 1439 }
1436 goto out; 1440 goto out;
1437 } 1441 }
1442
1443 /*
1444 * Initialize thread_map with comm names,
1445 * so we could print it out on output.
1446 */
1447 if (aggr_mode == AGGR_THREAD)
1448 thread_map__read_comms(evsel_list->threads);
1449
1438 if (interval && interval < 100) { 1450 if (interval && interval < 100) {
1439 pr_err("print interval must be >= 100ms\n"); 1451 pr_err("print interval must be >= 100ms\n");
1440 parse_options_usage(stat_usage, options, "I", 1); 1452 parse_options_usage(stat_usage, options, "I", 1);
@@ -1468,13 +1480,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1468 1480
1469 status = run_perf_stat(argc, argv); 1481 status = run_perf_stat(argc, argv);
1470 if (forever && status != -1) { 1482 if (forever && status != -1) {
1471 print_stat(argc, argv); 1483 print_counters(NULL, argc, argv);
1472 perf_stat__reset_stats(evsel_list); 1484 perf_stat__reset_stats();
1473 } 1485 }
1474 } 1486 }
1475 1487
1476 if (!forever && status != -1 && !interval) 1488 if (!forever && status != -1 && !interval)
1477 print_stat(argc, argv); 1489 print_counters(NULL, argc, argv);
1478 1490
1479 perf_evlist__free_stats(evsel_list); 1491 perf_evlist__free_stats(evsel_list);
1480out: 1492out:
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 2bf2ca771ca5..39ad4d0ca884 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1617,6 +1617,34 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1617 return syscall__set_arg_fmts(sc); 1617 return syscall__set_arg_fmts(sc);
1618} 1618}
1619 1619
1620static int trace__validate_ev_qualifier(struct trace *trace)
1621{
1622 int err = 0;
1623 struct str_node *pos;
1624
1625 strlist__for_each(pos, trace->ev_qualifier) {
1626 const char *sc = pos->s;
1627
1628 if (audit_name_to_syscall(sc, trace->audit.machine) < 0) {
1629 if (err == 0) {
1630 fputs("Error:\tInvalid syscall ", trace->output);
1631 err = -EINVAL;
1632 } else {
1633 fputs(", ", trace->output);
1634 }
1635
1636 fputs(sc, trace->output);
1637 }
1638 }
1639
1640 if (err < 0) {
1641 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1642 "\nHint:\tand: 'man syscalls'\n", trace->output);
1643 }
1644
1645 return err;
1646}
1647
1620/* 1648/*
1621 * args is to be interpreted as a series of longs but we need to handle 1649 * args is to be interpreted as a series of longs but we need to handle
1622 * 8-byte unaligned accesses. args points to raw_data within the event 1650 * 8-byte unaligned accesses. args points to raw_data within the event
@@ -2862,6 +2890,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2862 err = -ENOMEM; 2890 err = -ENOMEM;
2863 goto out_close; 2891 goto out_close;
2864 } 2892 }
2893
2894 err = trace__validate_ev_qualifier(&trace);
2895 if (err)
2896 goto out_close;
2865 } 2897 }
2866 2898
2867 err = target__validate(&trace.opts.target); 2899 err = target__validate(&trace.opts.target);
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index ee41e705b2eb..d20d6e6ab65b 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,6 +31,7 @@ perf-y += code-reading.o
31perf-y += sample-parsing.o 31perf-y += sample-parsing.o
32perf-y += parse-no-sample-id-all.o 32perf-y += parse-no-sample-id-all.o
33perf-y += kmod-path.o 33perf-y += kmod-path.o
34perf-y += thread-map.o
34 35
35perf-$(CONFIG_X86) += perf-time-to-tsc.o 36perf-$(CONFIG_X86) += perf-time-to-tsc.o
36 37
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 87b9961646e4..c1dde733c3a6 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -171,6 +171,10 @@ static struct test {
171 .func = test__kmod_path__parse, 171 .func = test__kmod_path__parse,
172 }, 172 },
173 { 173 {
174 .desc = "Test thread map",
175 .func = test__thread_map,
176 },
177 {
174 .func = NULL, 178 .func = NULL,
175 }, 179 },
176}; 180};
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index b8d552b13950..a572f87e9c8d 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -78,7 +78,7 @@ int test__openat_syscall_event_on_all_cpus(void)
78 * we use the auto allocation it will allocate just for 1 cpu, 78 * we use the auto allocation it will allocate just for 1 cpu,
79 * as we start by cpu 0. 79 * as we start by cpu 0.
80 */ 80 */
81 if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { 81 if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
82 pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); 82 pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
83 goto out_close_fd; 83 goto out_close_fd;
84 } 84 }
@@ -98,9 +98,9 @@ int test__openat_syscall_event_on_all_cpus(void)
98 } 98 }
99 99
100 expected = nr_openat_calls + cpu; 100 expected = nr_openat_calls + cpu;
101 if (evsel->counts->cpu[cpu].val != expected) { 101 if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
102 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", 102 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
103 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); 103 expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
104 err = -1; 104 err = -1;
105 } 105 }
106 } 106 }
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index bdfa1f446681..c9a37bc6b33a 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -44,9 +44,9 @@ int test__openat_syscall_event(void)
44 goto out_close_fd; 44 goto out_close_fd;
45 } 45 }
46 46
47 if (evsel->counts->cpu[0].val != nr_openat_calls) { 47 if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
48 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", 48 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
49 nr_openat_calls, evsel->counts->cpu[0].val); 49 nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
50 goto out_close_fd; 50 goto out_close_fd;
51 } 51 }
52 52
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 8e5038b48ba8..ebb47d96bc0b 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -61,6 +61,7 @@ int test__switch_tracking(void);
61int test__fdarray__filter(void); 61int test__fdarray__filter(void);
62int test__fdarray__add(void); 62int test__fdarray__add(void);
63int test__kmod_path__parse(void); 63int test__kmod_path__parse(void);
64int test__thread_map(void);
64 65
65#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) 66#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
66#ifdef HAVE_DWARF_UNWIND_SUPPORT 67#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
new file mode 100644
index 000000000000..5acf000939ea
--- /dev/null
+++ b/tools/perf/tests/thread-map.c
@@ -0,0 +1,38 @@
1#include <sys/types.h>
2#include <unistd.h>
3#include "tests.h"
4#include "thread_map.h"
5#include "debug.h"
6
7int test__thread_map(void)
8{
9 struct thread_map *map;
10
11 /* test map on current pid */
12 map = thread_map__new_by_pid(getpid());
13 TEST_ASSERT_VAL("failed to alloc map", map);
14
15 thread_map__read_comms(map);
16
17 TEST_ASSERT_VAL("wrong nr", map->nr == 1);
18 TEST_ASSERT_VAL("wrong pid",
19 thread_map__pid(map, 0) == getpid());
20 TEST_ASSERT_VAL("wrong comm",
21 thread_map__comm(map, 0) &&
22 !strcmp(thread_map__comm(map, 0), "perf"));
23 thread_map__put(map);
24
25 /* test dummy pid */
26 map = thread_map__new_dummy();
27 TEST_ASSERT_VAL("failed to alloc map", map);
28
29 thread_map__read_comms(map);
30
31 TEST_ASSERT_VAL("wrong nr", map->nr == 1);
32 TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1);
33 TEST_ASSERT_VAL("wrong comm",
34 thread_map__comm(map, 0) &&
35 !strcmp(thread_map__comm(map, 0), "dummy"));
36 thread_map__put(map);
37 return 0;
38}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8489b9d2812..037633c1da9d 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -289,5 +289,4 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
289 289
290void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 290void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
291 struct perf_evsel *tracking_evsel); 291 struct perf_evsel *tracking_evsel);
292
293#endif /* __PERF_EVLIST_H */ 292#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1b2f480a3e82..2936b3080722 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -898,7 +898,7 @@ void perf_evsel__delete(struct perf_evsel *evsel)
898 free(evsel); 898 free(evsel);
899} 899}
900 900
901void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, 901void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
902 struct perf_counts_values *count) 902 struct perf_counts_values *count)
903{ 903{
904 struct perf_counts_values tmp; 904 struct perf_counts_values tmp;
@@ -910,8 +910,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu,
910 tmp = evsel->prev_raw_counts->aggr; 910 tmp = evsel->prev_raw_counts->aggr;
911 evsel->prev_raw_counts->aggr = *count; 911 evsel->prev_raw_counts->aggr = *count;
912 } else { 912 } else {
913 tmp = evsel->prev_raw_counts->cpu[cpu]; 913 tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
914 evsel->prev_raw_counts->cpu[cpu] = *count; 914 *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
915 } 915 }
916 916
917 count->val = count->val - tmp.val; 917 count->val = count->val - tmp.val;
@@ -939,20 +939,18 @@ void perf_counts_values__scale(struct perf_counts_values *count,
939 *pscaled = scaled; 939 *pscaled = scaled;
940} 940}
941 941
942int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, 942int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
943 perf_evsel__read_cb_t cb) 943 struct perf_counts_values *count)
944{ 944{
945 struct perf_counts_values count; 945 memset(count, 0, sizeof(*count));
946
947 memset(&count, 0, sizeof(count));
948 946
949 if (FD(evsel, cpu, thread) < 0) 947 if (FD(evsel, cpu, thread) < 0)
950 return -EINVAL; 948 return -EINVAL;
951 949
952 if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) 950 if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0)
953 return -errno; 951 return -errno;
954 952
955 return cb(evsel, cpu, thread, &count); 953 return 0;
956} 954}
957 955
958int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 956int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
@@ -964,15 +962,15 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
964 if (FD(evsel, cpu, thread) < 0) 962 if (FD(evsel, cpu, thread) < 0)
965 return -EINVAL; 963 return -EINVAL;
966 964
967 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 965 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
968 return -ENOMEM; 966 return -ENOMEM;
969 967
970 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 968 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
971 return -errno; 969 return -errno;
972 970
973 perf_evsel__compute_deltas(evsel, cpu, &count); 971 perf_evsel__compute_deltas(evsel, cpu, thread, &count);
974 perf_counts_values__scale(&count, scale, NULL); 972 perf_counts_values__scale(&count, scale, NULL);
975 evsel->counts->cpu[cpu] = count; 973 *perf_counts(evsel->counts, cpu, thread) = count;
976 return 0; 974 return 0;
977} 975}
978 976
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4dbf32d94dfb..4a7ed5656cf0 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -9,23 +9,7 @@
9#include "xyarray.h" 9#include "xyarray.h"
10#include "symbol.h" 10#include "symbol.h"
11#include "cpumap.h" 11#include "cpumap.h"
12 12#include "stat.h"
13struct perf_counts_values {
14 union {
15 struct {
16 u64 val;
17 u64 ena;
18 u64 run;
19 };
20 u64 values[3];
21 };
22};
23
24struct perf_counts {
25 s8 scaled;
26 struct perf_counts_values aggr;
27 struct perf_counts_values cpu[];
28};
29 13
30struct perf_evsel; 14struct perf_evsel;
31 15
@@ -128,7 +112,7 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
128void perf_counts_values__scale(struct perf_counts_values *count, 112void perf_counts_values__scale(struct perf_counts_values *count,
129 bool scale, s8 *pscaled); 113 bool scale, s8 *pscaled);
130 114
131void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, 115void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
132 struct perf_counts_values *count); 116 struct perf_counts_values *count);
133 117
134int perf_evsel__object_config(size_t object_size, 118int perf_evsel__object_config(size_t object_size,
@@ -245,12 +229,8 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
245 (a)->attr.type == (b)->attr.type && \ 229 (a)->attr.type == (b)->attr.type && \
246 (a)->attr.config == (b)->attr.config) 230 (a)->attr.config == (b)->attr.config)
247 231
248typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, 232int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
249 int cpu, int thread, 233 struct perf_counts_values *count);
250 struct perf_counts_values *count);
251
252int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread,
253 perf_evsel__read_cb_t cb);
254 234
255int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 235int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
256 int cpu, int thread, bool scale); 236 int cpu, int thread, bool scale);
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 5925fec90562..e23ded40c79e 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -20,3 +20,4 @@ util/stat.c
20util/strlist.c 20util/strlist.c
21util/trace-event.c 21util/trace-event.c
22../../lib/rbtree.c 22../../lib/rbtree.c
23util/string.c
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 4014b709f956..f2a0d1521e26 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -1,6 +1,8 @@
1#include <math.h> 1#include <math.h>
2#include "stat.h" 2#include "stat.h"
3#include "evlist.h"
3#include "evsel.h" 4#include "evsel.h"
5#include "thread_map.h"
4 6
5void update_stats(struct stats *stats, u64 val) 7void update_stats(struct stats *stats, u64 val)
6{ 8{
@@ -95,33 +97,46 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
95 } 97 }
96} 98}
97 99
98struct perf_counts *perf_counts__new(int ncpus) 100struct perf_counts *perf_counts__new(int ncpus, int nthreads)
99{ 101{
100 int size = sizeof(struct perf_counts) + 102 struct perf_counts *counts = zalloc(sizeof(*counts));
101 ncpus * sizeof(struct perf_counts_values);
102 103
103 return zalloc(size); 104 if (counts) {
105 struct xyarray *values;
106
107 values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
108 if (!values) {
109 free(counts);
110 return NULL;
111 }
112
113 counts->values = values;
114 }
115
116 return counts;
104} 117}
105 118
106void perf_counts__delete(struct perf_counts *counts) 119void perf_counts__delete(struct perf_counts *counts)
107{ 120{
108 free(counts); 121 if (counts) {
122 xyarray__delete(counts->values);
123 free(counts);
124 }
109} 125}
110 126
111static void perf_counts__reset(struct perf_counts *counts, int ncpus) 127static void perf_counts__reset(struct perf_counts *counts)
112{ 128{
113 memset(counts, 0, (sizeof(*counts) + 129 xyarray__reset(counts->values);
114 (ncpus * sizeof(struct perf_counts_values))));
115} 130}
116 131
117void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus) 132void perf_evsel__reset_counts(struct perf_evsel *evsel)
118{ 133{
119 perf_counts__reset(evsel->counts, ncpus); 134 perf_counts__reset(evsel->counts);
120} 135}
121 136
122int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 137int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads)
123{ 138{
124 evsel->counts = perf_counts__new(ncpus); 139 evsel->counts = perf_counts__new(ncpus, nthreads);
125 return evsel->counts != NULL ? 0 : -ENOMEM; 140 return evsel->counts != NULL ? 0 : -ENOMEM;
126} 141}
127 142
@@ -130,3 +145,96 @@ void perf_evsel__free_counts(struct perf_evsel *evsel)
130 perf_counts__delete(evsel->counts); 145 perf_counts__delete(evsel->counts);
131 evsel->counts = NULL; 146 evsel->counts = NULL;
132} 147}
148
149void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
150{
151 int i;
152 struct perf_stat *ps = evsel->priv;
153
154 for (i = 0; i < 3; i++)
155 init_stats(&ps->res_stats[i]);
156
157 perf_stat_evsel_id_init(evsel);
158}
159
160int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
161{
162 evsel->priv = zalloc(sizeof(struct perf_stat));
163 if (evsel->priv == NULL)
164 return -ENOMEM;
165 perf_evsel__reset_stat_priv(evsel);
166 return 0;
167}
168
169void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
170{
171 zfree(&evsel->priv);
172}
173
174int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
175 int ncpus, int nthreads)
176{
177 struct perf_counts *counts;
178
179 counts = perf_counts__new(ncpus, nthreads);
180 if (counts)
181 evsel->prev_raw_counts = counts;
182
183 return counts ? 0 : -ENOMEM;
184}
185
186void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
187{
188 perf_counts__delete(evsel->prev_raw_counts);
189 evsel->prev_raw_counts = NULL;
190}
191
192int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
193{
194 int ncpus = perf_evsel__nr_cpus(evsel);
195 int nthreads = thread_map__nr(evsel->threads);
196
197 if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
198 perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
199 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
200 return -ENOMEM;
201
202 return 0;
203}
204
205int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
206{
207 struct perf_evsel *evsel;
208
209 evlist__for_each(evlist, evsel) {
210 if (perf_evsel__alloc_stats(evsel, alloc_raw))
211 goto out_free;
212 }
213
214 return 0;
215
216out_free:
217 perf_evlist__free_stats(evlist);
218 return -1;
219}
220
221void perf_evlist__free_stats(struct perf_evlist *evlist)
222{
223 struct perf_evsel *evsel;
224
225 evlist__for_each(evlist, evsel) {
226 perf_evsel__free_stat_priv(evsel);
227 perf_evsel__free_counts(evsel);
228 perf_evsel__free_prev_raw_counts(evsel);
229 }
230}
231
232void perf_evlist__reset_stats(struct perf_evlist *evlist)
233{
234 struct perf_evsel *evsel;
235
236 evlist__for_each(evlist, evsel) {
237 perf_evsel__reset_stat_priv(evsel);
238 perf_evsel__reset_counts(evsel);
239 }
240}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 093dc3cb28dd..1cfbe0a980ac 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <stdio.h> 5#include <stdio.h>
6#include "xyarray.h"
6 7
7struct stats 8struct stats
8{ 9{
@@ -29,8 +30,32 @@ enum aggr_mode {
29 AGGR_GLOBAL, 30 AGGR_GLOBAL,
30 AGGR_SOCKET, 31 AGGR_SOCKET,
31 AGGR_CORE, 32 AGGR_CORE,
33 AGGR_THREAD,
32}; 34};
33 35
36struct perf_counts_values {
37 union {
38 struct {
39 u64 val;
40 u64 ena;
41 u64 run;
42 };
43 u64 values[3];
44 };
45};
46
47struct perf_counts {
48 s8 scaled;
49 struct perf_counts_values aggr;
50 struct xyarray *values;
51};
52
53static inline struct perf_counts_values*
54perf_counts(struct perf_counts *counts, int cpu, int thread)
55{
56 return xyarray__entry(counts->values, cpu, thread);
57}
58
34void update_stats(struct stats *stats, u64 val); 59void update_stats(struct stats *stats, u64 val);
35double avg_stats(struct stats *stats); 60double avg_stats(struct stats *stats);
36double stddev_stats(struct stats *stats); 61double stddev_stats(struct stats *stats);
@@ -46,6 +71,8 @@ static inline void init_stats(struct stats *stats)
46} 71}
47 72
48struct perf_evsel; 73struct perf_evsel;
74struct perf_evlist;
75
49bool __perf_evsel_stat__is(struct perf_evsel *evsel, 76bool __perf_evsel_stat__is(struct perf_evsel *evsel,
50 enum perf_stat_evsel_id id); 77 enum perf_stat_evsel_id id);
51 78
@@ -62,10 +89,24 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
62void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, 89void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
63 double avg, int cpu, enum aggr_mode aggr); 90 double avg, int cpu, enum aggr_mode aggr);
64 91
65struct perf_counts *perf_counts__new(int ncpus); 92struct perf_counts *perf_counts__new(int ncpus, int nthreads);
66void perf_counts__delete(struct perf_counts *counts); 93void perf_counts__delete(struct perf_counts *counts);
67 94
68void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus); 95void perf_evsel__reset_counts(struct perf_evsel *evsel);
69int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); 96int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads);
70void perf_evsel__free_counts(struct perf_evsel *evsel); 97void perf_evsel__free_counts(struct perf_evsel *evsel);
98
99void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
100int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
101void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
102
103int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
104 int ncpus, int nthreads);
105void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel);
106
107int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
108
109int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
110void perf_evlist__free_stats(struct perf_evlist *evlist);
111void perf_evlist__reset_stats(struct perf_evlist *evlist);
71#endif 112#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 504f2d73b7ee..48b588c6951a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1132,8 +1132,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1132 INIT_LIST_HEAD(&md.maps); 1132 INIT_LIST_HEAD(&md.maps);
1133 1133
1134 fd = open(kcore_filename, O_RDONLY); 1134 fd = open(kcore_filename, O_RDONLY);
1135 if (fd < 0) 1135 if (fd < 0) {
1136 pr_err("%s requires CAP_SYS_RAWIO capability to access.\n",
1137 kcore_filename);
1136 return -EINVAL; 1138 return -EINVAL;
1139 }
1137 1140
1138 /* Read new maps into temporary lists */ 1141 /* Read new maps into temporary lists */
1139 err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, 1142 err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 368cc58c6892..da7646d767fe 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -8,9 +8,11 @@
8#include <unistd.h> 8#include <unistd.h>
9#include "strlist.h" 9#include "strlist.h"
10#include <string.h> 10#include <string.h>
11#include <api/fs/fs.h>
11#include "asm/bug.h" 12#include "asm/bug.h"
12#include "thread_map.h" 13#include "thread_map.h"
13#include "util.h" 14#include "util.h"
15#include "debug.h"
14 16
15/* Skip "." and ".." directories */ 17/* Skip "." and ".." directories */
16static int filter(const struct dirent *dir) 18static int filter(const struct dirent *dir)
@@ -21,11 +23,26 @@ static int filter(const struct dirent *dir)
21 return 1; 23 return 1;
22} 24}
23 25
26static void thread_map__reset(struct thread_map *map, int start, int nr)
27{
28 size_t size = (nr - start) * sizeof(map->map[0]);
29
30 memset(&map->map[start], 0, size);
31}
32
24static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) 33static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
25{ 34{
26 size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; 35 size_t size = sizeof(*map) + sizeof(map->map[0]) * nr;
36 int start = map ? map->nr : 0;
27 37
28 return realloc(map, size); 38 map = realloc(map, size);
39 /*
40 * We only realloc to add more items, let's reset new items.
41 */
42 if (map)
43 thread_map__reset(map, start, nr);
44
45 return map;
29} 46}
30 47
31#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr) 48#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
@@ -304,8 +321,12 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
304static void thread_map__delete(struct thread_map *threads) 321static void thread_map__delete(struct thread_map *threads)
305{ 322{
306 if (threads) { 323 if (threads) {
324 int i;
325
307 WARN_ONCE(atomic_read(&threads->refcnt) != 0, 326 WARN_ONCE(atomic_read(&threads->refcnt) != 0,
308 "thread map refcnt unbalanced\n"); 327 "thread map refcnt unbalanced\n");
328 for (i = 0; i < threads->nr; i++)
329 free(thread_map__comm(threads, i));
309 free(threads); 330 free(threads);
310 } 331 }
311} 332}
@@ -333,3 +354,56 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
333 354
334 return printed + fprintf(fp, "\n"); 355 return printed + fprintf(fp, "\n");
335} 356}
357
358static int get_comm(char **comm, pid_t pid)
359{
360 char *path;
361 size_t size;
362 int err;
363
364 if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1)
365 return -ENOMEM;
366
367 err = filename__read_str(path, comm, &size);
368 if (!err) {
369 /*
370 * We're reading 16 bytes, while filename__read_str
371 * allocates data per BUFSIZ bytes, so we can safely
372 * mark the end of the string.
373 */
374 (*comm)[size] = 0;
375 rtrim(*comm);
376 }
377
378 free(path);
379 return err;
380}
381
382static void comm_init(struct thread_map *map, int i)
383{
384 pid_t pid = thread_map__pid(map, i);
385 char *comm = NULL;
386
387 /* dummy pid comm initialization */
388 if (pid == -1) {
389 map->map[i].comm = strdup("dummy");
390 return;
391 }
392
393 /*
394 * The comm name is like extra bonus ;-),
395 * so just warn if we fail for any reason.
396 */
397 if (get_comm(&comm, pid))
398 pr_warning("Couldn't resolve comm name for pid %d\n", pid);
399
400 map->map[i].comm = comm;
401}
402
403void thread_map__read_comms(struct thread_map *threads)
404{
405 int i;
406
407 for (i = 0; i < threads->nr; ++i)
408 comm_init(threads, i);
409}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 6b0cd2dc006b..af679d8a50f8 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -7,6 +7,7 @@
7 7
8struct thread_map_data { 8struct thread_map_data {
9 pid_t pid; 9 pid_t pid;
10 char *comm;
10}; 11};
11 12
12struct thread_map { 13struct thread_map {
@@ -44,4 +45,11 @@ thread_map__set_pid(struct thread_map *map, int thread, pid_t pid)
44{ 45{
45 map->map[thread].pid = pid; 46 map->map[thread].pid = pid;
46} 47}
48
49static inline char *thread_map__comm(struct thread_map *map, int thread)
50{
51 return map->map[thread].comm;
52}
53
54void thread_map__read_comms(struct thread_map *threads);
47#endif /* __PERF_THREAD_MAP_H */ 55#endif /* __PERF_THREAD_MAP_H */