aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-03-09 02:27:55 -0500
committerIngo Molnar <mingo@kernel.org>2018-03-09 02:27:55 -0500
commitfbf8a1e12c3ba3afdf0804bc80f5f13dfec1cffe (patch)
tree6b0dd23c7646cd4ec13b0636cdda11188d6845a3
parent1af22eba248efe2de25658041a80a3d40fb3e92e (diff)
parent2427b432e63b4b911100f717c48289195b7a7d62 (diff)
Merge tag 'perf-core-for-mingo-4.17-20180308' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Support to display the IPC/Cycle in 'annotate' TUI, for systems where this info can be obtained, like Intel's >= Skylake (Jin Yao) - Support wildcards on PMU name in dynamic PMU events (Agustin Vega-Frias) - Display pmu name when printing unmerged events in stat (Agustin Vega-Frias) - Auto-merge PMU events created by prefix or glob match (Agustin Vega-Frias) - Fix s390 'call' operations target function annotation (Thomas Richter) - Handle s390 PC relative load and store instruction in the augmented 'annotate', code, used so far in the TUI modes of 'perf report' and 'perf annotate' (Thomas Richter) - Provide libtraceevent with a kernel symbol resolver, so that symbols in tracepoint fields can be resolved when showing them in tools such as 'perf report' (Wang YanQing) - Refactor the cgroups code to look more like other code in tools/perf, using cgroup__{put,get} for refcount operations instead of its open-coded equivalent, breaking larger functions, etc (Arnaldo Carvalho de Melo) - Implement support for the -G/--cgroup target in 'perf trace', allowing strace like tracing (plus other events, backtraces, etc) for cgroups (Arnaldo Carvalho de Melo) - Update thread shortname in 'perf sched map' when the thread's COMM changes (Changbin Du) - refcount 'struct mem_info', for better sharing it over several users, avoid duplicating structs and fixing crashes related to use after free (Jiri Olsa) - Display perf.data version, offsets in 'perf report --header' (Jiri Olsa) - Record the machine's memory topology information in a perf.data feature section, to be used by tools such as 'perf c2c' (Jiri Olsa) - Fix output of forced groups in the header for 'perf report' --stdio and --tui (Jiri Olsa) - Better support llvm, clang, cxx make tests in the build process (Jiri Olsa) - Streamline the 'struct perf_mmap' methods, storing some info in the struct instead of passing it via various methods, shortening its signatures (Kan Liang) - Update the quipper perf.data parser library site information (Stephane Eranian) - Correct perf's man pages title markers for asciidoctor (Takashi Iwai) - Intel PT fixes and refactorings paving the way for implementing support for AUX area sampling (Adrian Hunter) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/build/Makefile.feature6
-rw-r--r--tools/build/feature/Makefile14
-rw-r--r--tools/include/linux/bitmap.h2
-rw-r--r--tools/perf/Documentation/perf-data.txt2
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt2
-rw-r--r--tools/perf/Documentation/perf-kallsyms.txt2
-rw-r--r--tools/perf/Documentation/perf-list.txt8
-rw-r--r--tools/perf/Documentation/perf-sched.txt2
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt17
-rw-r--r--tools/perf/Documentation/perf-trace.txt25
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt7
-rw-r--r--tools/perf/Makefile.perf6
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c116
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c7
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c14
-rw-r--r--tools/perf/builtin-annotate.c88
-rw-r--r--tools/perf/builtin-c2c.c24
-rw-r--r--tools/perf/builtin-kvm.c9
-rw-r--r--tools/perf/builtin-record.c45
-rw-r--r--tools/perf/builtin-report.c26
-rw-r--r--tools/perf/builtin-sched.c133
-rw-r--r--tools/perf/builtin-stat.c29
-rw-r--r--tools/perf/builtin-top.c7
-rw-r--r--tools/perf/builtin-trace.c57
-rw-r--r--tools/perf/tests/backward-ring-buffer.c5
-rw-r--r--tools/perf/tests/bpf.c5
-rw-r--r--tools/perf/tests/code-reading.c7
-rw-r--r--tools/perf/tests/keep-tracking.c7
-rw-r--r--tools/perf/tests/mmap-basic.c7
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c7
-rw-r--r--tools/perf/tests/perf-record.c7
-rw-r--r--tools/perf/tests/sw-clock.c7
-rw-r--r--tools/perf/tests/switch-tracking.c7
-rw-r--r--tools/perf/tests/task-exit.c7
-rw-r--r--tools/perf/ui/browsers/hists.c5
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/auxtrace.c37
-rw-r--r--tools/perf/util/auxtrace.h2
-rw-r--r--tools/perf/util/cgroup.c111
-rw-r--r--tools/perf/util/cgroup.h13
-rw-r--r--tools/perf/util/env.h9
-rw-r--r--tools/perf/util/evlist.c8
-rw-r--r--tools/perf/util/evsel.c23
-rw-r--r--tools/perf/util/evsel.h6
-rw-r--r--tools/perf/util/header.c312
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c64
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h2
-rw-r--r--tools/perf/util/intel-pt.c110
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/mmap.c63
-rw-r--r--tools/perf/util/mmap.h16
-rw-r--r--tools/perf/util/parse-events.c21
-rw-r--r--tools/perf/util/parse-events.h2
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/parse-events.y18
-rw-r--r--tools/perf/util/python.c7
-rw-r--r--tools/perf/util/symbol.c22
-rw-r--r--tools/perf/util/symbol.h19
-rw-r--r--tools/perf/util/thread.h1
62 files changed, 1197 insertions, 401 deletions
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c378f003b007..5b6dda3b1ca8 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -82,7 +82,11 @@ FEATURE_TESTS_EXTRA := \
82 liberty-z \ 82 liberty-z \
83 libunwind-debug-frame \ 83 libunwind-debug-frame \
84 libunwind-debug-frame-arm \ 84 libunwind-debug-frame-arm \
85 libunwind-debug-frame-aarch64 85 libunwind-debug-frame-aarch64 \
86 cxx \
87 llvm \
88 llvm-version \
89 clang
86 90
87FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) 91FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
88 92
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0a490cb15149..dac9563b5470 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -54,7 +54,10 @@ FILES= \
54 test-jvmti.bin \ 54 test-jvmti.bin \
55 test-sched_getcpu.bin \ 55 test-sched_getcpu.bin \
56 test-setns.bin \ 56 test-setns.bin \
57 test-libopencsd.bin 57 test-libopencsd.bin \
58 test-clang.bin \
59 test-llvm.bin \
60 test-llvm-version.bin
58 61
59FILES := $(addprefix $(OUTPUT),$(FILES)) 62FILES := $(addprefix $(OUTPUT),$(FILES))
60 63
@@ -257,11 +260,13 @@ $(OUTPUT)test-llvm.bin:
257 -I$(shell $(LLVM_CONFIG) --includedir) \ 260 -I$(shell $(LLVM_CONFIG) --includedir) \
258 -L$(shell $(LLVM_CONFIG) --libdir) \ 261 -L$(shell $(LLVM_CONFIG) --libdir) \
259 $(shell $(LLVM_CONFIG) --libs Core BPF) \ 262 $(shell $(LLVM_CONFIG) --libs Core BPF) \
260 $(shell $(LLVM_CONFIG) --system-libs) 263 $(shell $(LLVM_CONFIG) --system-libs) \
264 > $(@:.bin=.make.output) 2>&1
261 265
262$(OUTPUT)test-llvm-version.bin: 266$(OUTPUT)test-llvm-version.bin:
263 $(BUILDXX) -std=gnu++11 \ 267 $(BUILDXX) -std=gnu++11 \
264 -I$(shell $(LLVM_CONFIG) --includedir) 268 -I$(shell $(LLVM_CONFIG) --includedir) \
269 > $(@:.bin=.make.output) 2>&1
265 270
266$(OUTPUT)test-clang.bin: 271$(OUTPUT)test-clang.bin:
267 $(BUILDXX) -std=gnu++11 \ 272 $(BUILDXX) -std=gnu++11 \
@@ -271,7 +276,8 @@ $(OUTPUT)test-clang.bin:
271 -lclangFrontend -lclangEdit -lclangLex \ 276 -lclangFrontend -lclangEdit -lclangLex \
272 -lclangAST -Wl,--end-group \ 277 -lclangAST -Wl,--end-group \
273 $(shell $(LLVM_CONFIG) --libs Core option) \ 278 $(shell $(LLVM_CONFIG) --libs Core option) \
274 $(shell $(LLVM_CONFIG) --system-libs) 279 $(shell $(LLVM_CONFIG) --system-libs) \
280 > $(@:.bin=.make.output) 2>&1
275 281
276-include $(OUTPUT)*.d 282-include $(OUTPUT)*.d
277 283
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca160270fdfa..63440cc8d618 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -98,7 +98,7 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
98 98
99/** 99/**
100 * bitmap_alloc - Allocate bitmap 100 * bitmap_alloc - Allocate bitmap
101 * @nr: Bit to set 101 * @nbits: Number of bits
102 */ 102 */
103static inline unsigned long *bitmap_alloc(int nbits) 103static inline unsigned long *bitmap_alloc(int nbits)
104{ 104{
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 90bb4aabe4f8..c87180764829 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -1,5 +1,5 @@
1perf-data(1) 1perf-data(1)
2============== 2============
3 3
4NAME 4NAME
5---- 5----
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 721a447f046e..b80c84307dc9 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -1,5 +1,5 @@
1perf-ftrace(1) 1perf-ftrace(1)
2============= 2==============
3 3
4NAME 4NAME
5---- 5----
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index cf9f4040ea5c..f3c620951f6e 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -1,5 +1,5 @@
1perf-kallsyms(1) 1perf-kallsyms(1)
2============== 2================
3 3
4NAME 4NAME
5---- 5----
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index e2a897ae3596..2549c34a7895 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -141,7 +141,13 @@ on the first memory controller on socket 0 of a Intel Xeon system
141 141
142Each memory controller has its own PMU. Measuring the complete system 142Each memory controller has its own PMU. Measuring the complete system
143bandwidth would require specifying all imc PMUs (see perf list output), 143bandwidth would require specifying all imc PMUs (see perf list output),
144and adding the values together. 144and adding the values together. To simplify creation of multiple events,
145prefix and glob matching is supported in the PMU name, and the prefix
146'uncore_' is also ignored when performing the match. So the command above
147can be expanded to all memory controllers by using the syntaxes:
148
149 perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
150 perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
145 151
146This example measures the combined core power every second 152This example measures the combined core power every second
147 153
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index c7e50f263887..bb33601a823b 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -1,5 +1,5 @@
1perf-sched(1) 1perf-sched(1)
2============== 2=============
3 3
4NAME 4NAME
5---- 5----
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index 142606c0ec9c..5a1f68122f50 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,5 +1,5 @@
1perf-script-perl(1) 1perf-script-perl(1)
2================== 2===================
3 3
4NAME 4NAME
5---- 5----
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2b38e222016a..f15b306be183 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -49,6 +49,13 @@ report::
49 parameters are defined by corresponding entries in 49 parameters are defined by corresponding entries in
50 /sys/bus/event_source/devices/<pmu>/format/* 50 /sys/bus/event_source/devices/<pmu>/format/*
51 51
52 Note that the last two syntaxes support prefix and glob matching in
53 the PMU name to simplify creation of events accross multiple instances
54 of the same type of PMU in large systems (e.g. memory controller PMUs).
55 Multiple PMU instances are typical for uncore PMUs, so the prefix
56 'uncore_' is also ignored when performing this match.
57
58
52-i:: 59-i::
53--no-inherit:: 60--no-inherit::
54 child tasks do not inherit counters 61 child tasks do not inherit counters
@@ -260,6 +267,16 @@ taskset.
260--no-merge:: 267--no-merge::
261Do not merge results from same PMUs. 268Do not merge results from same PMUs.
262 269
270When multiple events are created from a single event specification,
271stat will, by default, aggregate the event counts and show the result
272in a single row. This option disables that behavior and shows
273the individual events and counts.
274
275Multiple events are created from a single event specification when:
2761. Prefix or glob matching is used for the PMU name.
2772. Aliases, which are listed immediately after the Kernel PMU events
278 by perf list, are used.
279
263--smi-cost:: 280--smi-cost::
264Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. 281Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
265 282
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 33a88e984e66..5a7035c5c523 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -63,6 +63,31 @@ filter out the startup phase of the program, which is often very different.
63--uid=:: 63--uid=::
64 Record events in threads owned by uid. Name or number. 64 Record events in threads owned by uid. Name or number.
65 65
66-G::
67--cgroup::
68 Record events in threads in a cgroup.
69
70 Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
71 remove the /sys/fs/cgroup/perf_event/ part and try:
72
73 perf trace -G A -e sched:*switch
74
75 Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
76 _and_ sched:sched_switch to the 'A' cgroup, while:
77
78 perf trace -e sched:*switch -G A
79
80 will only set the sched:sched_switch event to the 'A' cgroup, all the
81 other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
82 a cgroup (on the root cgroup, sys wide, etc).
83
84 Multiple cgroups:
85
86 perf trace -G A -e sched:*switch -G B
87
88 the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
89 to the 'B' cgroup.
90
66--filter-pids=:: 91--filter-pids=::
67 Filter out events for these pids and for 'trace' itself (comma separated list). 92 Filter out events for these pids and for 'trace' itself (comma separated list).
68 93
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index f7d85e89a98a..d00f0d51cab8 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -485,10 +485,5 @@ in pmu-tools parser. This allows to read perf.data from python and dump it.
485quipper 485quipper
486 486
487The quipper C++ parser is available at 487The quipper C++ parser is available at
488https://chromium.googlesource.com/chromiumos/platform2 488http://github.com/google/perf_data_converter/tree/master/src/quipper
489 489
490It is under the chromiumos-wide-profiling/ subdirectory. This library can
491convert a perf data file to a protobuf and vice versa.
492
493Unfortunately this parser tends to be many versions behind and may not be able
494to parse data files generated by recent perf.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4679e237a7f5..f7517e1b73f8 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -708,15 +708,15 @@ TAG_FILES= ../../include/uapi/linux/perf_event.h
708 708
709TAGS: 709TAGS:
710 $(QUIET_GEN)$(RM) TAGS; \ 710 $(QUIET_GEN)$(RM) TAGS; \
711 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES) 711 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
712 712
713tags: 713tags:
714 $(QUIET_GEN)$(RM) tags; \ 714 $(QUIET_GEN)$(RM) tags; \
715 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES) 715 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
716 716
717cscope: 717cscope:
718 $(QUIET_GEN)$(RM) cscope*; \ 718 $(QUIET_GEN)$(RM) cscope*; \
719 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) 719 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
720 720
721### Testing rules 721### Testing rules
722 722
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 01df9d8303e1..46c21831f2ac 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -1,6 +1,112 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h> 2#include <linux/compiler.h>
3 3
4static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
5 struct map *map)
6{
7 char *endptr, *tok, *name;
8 struct addr_map_symbol target = {
9 .map = map,
10 };
11
12 tok = strchr(ops->raw, ',');
13 if (!tok)
14 return -1;
15
16 ops->target.addr = strtoull(tok + 1, &endptr, 16);
17
18 name = strchr(endptr, '<');
19 if (name == NULL)
20 return -1;
21
22 name++;
23
24 if (arch->objdump.skip_functions_char &&
25 strchr(name, arch->objdump.skip_functions_char))
26 return -1;
27
28 tok = strchr(name, '>');
29 if (tok == NULL)
30 return -1;
31
32 *tok = '\0';
33 ops->target.name = strdup(name);
34 *tok = '>';
35
36 if (ops->target.name == NULL)
37 return -1;
38 target.addr = map__objdump_2mem(map, ops->target.addr);
39
40 if (map_groups__find_ams(&target) == 0 &&
41 map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
42 ops->target.sym = target.sym;
43
44 return 0;
45}
46
47static int call__scnprintf(struct ins *ins, char *bf, size_t size,
48 struct ins_operands *ops);
49
50static struct ins_ops s390_call_ops = {
51 .parse = s390_call__parse,
52 .scnprintf = call__scnprintf,
53};
54
55static int s390_mov__parse(struct arch *arch __maybe_unused,
56 struct ins_operands *ops,
57 struct map *map __maybe_unused)
58{
59 char *s = strchr(ops->raw, ','), *target, *endptr;
60
61 if (s == NULL)
62 return -1;
63
64 *s = '\0';
65 ops->source.raw = strdup(ops->raw);
66 *s = ',';
67
68 if (ops->source.raw == NULL)
69 return -1;
70
71 target = ++s;
72 ops->target.raw = strdup(target);
73 if (ops->target.raw == NULL)
74 goto out_free_source;
75
76 ops->target.addr = strtoull(target, &endptr, 16);
77 if (endptr == target)
78 goto out_free_target;
79
80 s = strchr(endptr, '<');
81 if (s == NULL)
82 goto out_free_target;
83 endptr = strchr(s + 1, '>');
84 if (endptr == NULL)
85 goto out_free_target;
86
87 *endptr = '\0';
88 ops->target.name = strdup(s + 1);
89 *endptr = '>';
90 if (ops->target.name == NULL)
91 goto out_free_target;
92
93 return 0;
94
95out_free_target:
96 zfree(&ops->target.raw);
97out_free_source:
98 zfree(&ops->source.raw);
99 return -1;
100}
101
102static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
103 struct ins_operands *ops);
104
105static struct ins_ops s390_mov_ops = {
106 .parse = s390_mov__parse,
107 .scnprintf = mov__scnprintf,
108};
109
4static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) 110static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
5{ 111{
6 struct ins_ops *ops = NULL; 112 struct ins_ops *ops = NULL;
@@ -14,9 +120,17 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
14 if (!strcmp(name, "bras") || 120 if (!strcmp(name, "bras") ||
15 !strcmp(name, "brasl") || 121 !strcmp(name, "brasl") ||
16 !strcmp(name, "basr")) 122 !strcmp(name, "basr"))
17 ops = &call_ops; 123 ops = &s390_call_ops;
18 if (!strcmp(name, "br")) 124 if (!strcmp(name, "br"))
19 ops = &ret_ops; 125 ops = &ret_ops;
126 /* override load/store relative to PC */
127 if (!strcmp(name, "lrl") ||
128 !strcmp(name, "lgrl") ||
129 !strcmp(name, "lgfrl") ||
130 !strcmp(name, "llgfrl") ||
131 !strcmp(name, "strl") ||
132 !strcmp(name, "stgrl"))
133 ops = &s390_mov_ops;
20 134
21 if (ops) 135 if (ops)
22 arch__associate_ins_ops(arch, name, ops); 136 arch__associate_ins_ops(arch, name, ops);
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 7f82d91ef473..7a7721604b86 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -61,7 +61,6 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
61 u64 test_tsc, comm1_tsc, comm2_tsc; 61 u64 test_tsc, comm1_tsc, comm2_tsc;
62 u64 test_time, comm1_time = 0, comm2_time = 0; 62 u64 test_time, comm1_time = 0, comm2_time = 0;
63 struct perf_mmap *md; 63 struct perf_mmap *md;
64 u64 end, start;
65 64
66 threads = thread_map__new(-1, getpid(), UINT_MAX); 65 threads = thread_map__new(-1, getpid(), UINT_MAX);
67 CHECK_NOT_NULL__(threads); 66 CHECK_NOT_NULL__(threads);
@@ -112,10 +111,10 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
112 111
113 for (i = 0; i < evlist->nr_mmaps; i++) { 112 for (i = 0; i < evlist->nr_mmaps; i++) {
114 md = &evlist->mmap[i]; 113 md = &evlist->mmap[i];
115 if (perf_mmap__read_init(md, false, &start, &end) < 0) 114 if (perf_mmap__read_init(md) < 0)
116 continue; 115 continue;
117 116
118 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 117 while ((event = perf_mmap__read_event(md)) != NULL) {
119 struct perf_sample sample; 118 struct perf_sample sample;
120 119
121 if (event->header.type != PERF_RECORD_COMM || 120 if (event->header.type != PERF_RECORD_COMM ||
@@ -134,7 +133,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
134 comm2_time = sample.time; 133 comm2_time = sample.time;
135 } 134 }
136next_event: 135next_event:
137 perf_mmap__consume(md, false); 136 perf_mmap__consume(md);
138 } 137 }
139 perf_mmap__read_done(md); 138 perf_mmap__read_done(md);
140 } 139 }
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 6aa3f2a38321..b135af62011c 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -37,15 +37,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
37 intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); 37 intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
38 intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); 38 intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
39 39
40 if (evlist) { 40 evlist__for_each_entry(evlist, evsel) {
41 evlist__for_each_entry(evlist, evsel) { 41 if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type)
42 if (intel_pt_pmu && 42 found_pt = true;
43 evsel->attr.type == intel_pt_pmu->type) 43 if (intel_bts_pmu && evsel->attr.type == intel_bts_pmu->type)
44 found_pt = true; 44 found_bts = true;
45 if (intel_bts_pmu &&
46 evsel->attr.type == intel_bts_pmu->type)
47 found_bts = true;
48 }
49 } 45 }
50 46
51 if (found_pt && found_bts) { 47 if (found_pt && found_bts) {
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f15731a3d438..ead6ae4549e5 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -44,6 +44,7 @@ struct perf_annotate {
44 bool full_paths; 44 bool full_paths;
45 bool print_line; 45 bool print_line;
46 bool skip_missing; 46 bool skip_missing;
47 bool has_br_stack;
47 const char *sym_hist_filter; 48 const char *sym_hist_filter;
48 const char *cpu_list; 49 const char *cpu_list;
49 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 50 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -146,16 +147,73 @@ static void process_branch_stack(struct branch_stack *bs, struct addr_location *
146 free(bi); 147 free(bi);
147} 148}
148 149
150static int hist_iter__branch_callback(struct hist_entry_iter *iter,
151 struct addr_location *al __maybe_unused,
152 bool single __maybe_unused,
153 void *arg __maybe_unused)
154{
155 struct hist_entry *he = iter->he;
156 struct branch_info *bi;
157 struct perf_sample *sample = iter->sample;
158 struct perf_evsel *evsel = iter->evsel;
159 int err;
160
161 hist__account_cycles(sample->branch_stack, al, sample, false);
162
163 bi = he->branch_info;
164 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
165
166 if (err)
167 goto out;
168
169 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
170
171out:
172 return err;
173}
174
175static int process_branch_callback(struct perf_evsel *evsel,
176 struct perf_sample *sample,
177 struct addr_location *al __maybe_unused,
178 struct perf_annotate *ann,
179 struct machine *machine)
180{
181 struct hist_entry_iter iter = {
182 .evsel = evsel,
183 .sample = sample,
184 .add_entry_cb = hist_iter__branch_callback,
185 .hide_unresolved = symbol_conf.hide_unresolved,
186 .ops = &hist_iter_branch,
187 };
188
189 struct addr_location a;
190 int ret;
191
192 if (machine__resolve(machine, &a, sample) < 0)
193 return -1;
194
195 if (a.sym == NULL)
196 return 0;
197
198 if (a.map != NULL)
199 a.map->dso->hit = 1;
200
201 ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
202 return ret;
203}
204
149static int perf_evsel__add_sample(struct perf_evsel *evsel, 205static int perf_evsel__add_sample(struct perf_evsel *evsel,
150 struct perf_sample *sample, 206 struct perf_sample *sample,
151 struct addr_location *al, 207 struct addr_location *al,
152 struct perf_annotate *ann) 208 struct perf_annotate *ann,
209 struct machine *machine)
153{ 210{
154 struct hists *hists = evsel__hists(evsel); 211 struct hists *hists = evsel__hists(evsel);
155 struct hist_entry *he; 212 struct hist_entry *he;
156 int ret; 213 int ret;
157 214
158 if (ann->sym_hist_filter != NULL && 215 if ((!ann->has_br_stack || !ui__has_annotation()) &&
216 ann->sym_hist_filter != NULL &&
159 (al->sym == NULL || 217 (al->sym == NULL ||
160 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) { 218 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
161 /* We're only interested in a symbol named sym_hist_filter */ 219 /* We're only interested in a symbol named sym_hist_filter */
@@ -178,6 +236,9 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
178 */ 236 */
179 process_branch_stack(sample->branch_stack, al, sample); 237 process_branch_stack(sample->branch_stack, al, sample);
180 238
239 if (ann->has_br_stack && ui__has_annotation())
240 return process_branch_callback(evsel, sample, al, ann, machine);
241
181 he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); 242 he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
182 if (he == NULL) 243 if (he == NULL)
183 return -ENOMEM; 244 return -ENOMEM;
@@ -206,7 +267,8 @@ static int process_sample_event(struct perf_tool *tool,
206 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) 267 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
207 goto out_put; 268 goto out_put;
208 269
209 if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) { 270 if (!al.filtered &&
271 perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
210 pr_warning("problem incrementing symbol count, " 272 pr_warning("problem incrementing symbol count, "
211 "skipping event\n"); 273 "skipping event\n");
212 ret = -1; 274 ret = -1;
@@ -238,6 +300,10 @@ static void hists__find_annotations(struct hists *hists,
238 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned) 300 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
239 goto find_next; 301 goto find_next;
240 302
303 if (ann->sym_hist_filter &&
304 (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
305 goto find_next;
306
241 notes = symbol__annotation(he->ms.sym); 307 notes = symbol__annotation(he->ms.sym);
242 if (notes->src == NULL) { 308 if (notes->src == NULL) {
243find_next: 309find_next:
@@ -269,6 +335,7 @@ find_next:
269 nd = rb_next(nd); 335 nd = rb_next(nd);
270 } else if (use_browser == 1) { 336 } else if (use_browser == 1) {
271 key = hist_entry__tui_annotate(he, evsel, NULL); 337 key = hist_entry__tui_annotate(he, evsel, NULL);
338
272 switch (key) { 339 switch (key) {
273 case -1: 340 case -1:
274 if (!ann->skip_missing) 341 if (!ann->skip_missing)
@@ -489,6 +556,9 @@ int cmd_annotate(int argc, const char **argv)
489 if (annotate.session == NULL) 556 if (annotate.session == NULL)
490 return -1; 557 return -1;
491 558
559 annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
560 HEADER_BRANCH_STACK);
561
492 ret = symbol__annotation_init(); 562 ret = symbol__annotation_init();
493 if (ret < 0) 563 if (ret < 0)
494 goto out_delete; 564 goto out_delete;
@@ -499,9 +569,6 @@ int cmd_annotate(int argc, const char **argv)
499 if (ret < 0) 569 if (ret < 0)
500 goto out_delete; 570 goto out_delete;
501 571
502 if (setup_sorting(NULL) < 0)
503 usage_with_options(annotate_usage, options);
504
505 if (annotate.use_stdio) 572 if (annotate.use_stdio)
506 use_browser = 0; 573 use_browser = 0;
507 else if (annotate.use_tui) 574 else if (annotate.use_tui)
@@ -511,6 +578,15 @@ int cmd_annotate(int argc, const char **argv)
511 578
512 setup_browser(true); 579 setup_browser(true);
513 580
581 if (use_browser == 1 && annotate.has_br_stack) {
582 sort__mode = SORT_MODE__BRANCH;
583 if (setup_sorting(annotate.session->evlist) < 0)
584 usage_with_options(annotate_usage, options);
585 } else {
586 if (setup_sorting(NULL) < 0)
587 usage_with_options(annotate_usage, options);
588 }
589
514 ret = __cmd_annotate(&annotate); 590 ret = __cmd_annotate(&annotate);
515 591
516out_delete: 592out_delete:
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 539c3d460158..98d243fa0c06 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -237,9 +237,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
237 if (mi == NULL) 237 if (mi == NULL)
238 return -ENOMEM; 238 return -ENOMEM;
239 239
240 mi_dup = memdup(mi, sizeof(*mi)); 240 /*
241 if (!mi_dup) 241 * The mi object is released in hists__add_entry_ops,
242 goto free_mi; 242 * if it gets sorted out into existing data, so we need
243 * to take the copy now.
244 */
245 mi_dup = mem_info__get(mi);
243 246
244 c2c_decode_stats(&stats, mi); 247 c2c_decode_stats(&stats, mi);
245 248
@@ -247,7 +250,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
247 &al, NULL, NULL, mi, 250 &al, NULL, NULL, mi,
248 sample, true); 251 sample, true);
249 if (he == NULL) 252 if (he == NULL)
250 goto free_mi_dup; 253 goto free_mi;
251 254
252 c2c_he = container_of(he, struct c2c_hist_entry, he); 255 c2c_he = container_of(he, struct c2c_hist_entry, he);
253 c2c_add_stats(&c2c_he->stats, &stats); 256 c2c_add_stats(&c2c_he->stats, &stats);
@@ -272,19 +275,15 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
272 275
273 mi = mi_dup; 276 mi = mi_dup;
274 277
275 mi_dup = memdup(mi, sizeof(*mi));
276 if (!mi_dup)
277 goto free_mi;
278
279 c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2); 278 c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
280 if (!c2c_hists) 279 if (!c2c_hists)
281 goto free_mi_dup; 280 goto free_mi;
282 281
283 he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops, 282 he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
284 &al, NULL, NULL, mi, 283 &al, NULL, NULL, mi,
285 sample, true); 284 sample, true);
286 if (he == NULL) 285 if (he == NULL)
287 goto free_mi_dup; 286 goto free_mi;
288 287
289 c2c_he = container_of(he, struct c2c_hist_entry, he); 288 c2c_he = container_of(he, struct c2c_hist_entry, he);
290 c2c_add_stats(&c2c_he->stats, &stats); 289 c2c_add_stats(&c2c_he->stats, &stats);
@@ -303,10 +302,9 @@ out:
303 addr_location__put(&al); 302 addr_location__put(&al);
304 return ret; 303 return ret;
305 304
306free_mi_dup:
307 free(mi_dup);
308free_mi: 305free_mi:
309 free(mi); 306 mem_info__put(mi_dup);
307 mem_info__put(mi);
310 ret = -ENOMEM; 308 ret = -ENOMEM;
311 goto out; 309 goto out;
312} 310}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index d2703d3b8366..72e2ca096bf5 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -746,21 +746,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
746 struct perf_evlist *evlist = kvm->evlist; 746 struct perf_evlist *evlist = kvm->evlist;
747 union perf_event *event; 747 union perf_event *event;
748 struct perf_mmap *md; 748 struct perf_mmap *md;
749 u64 end, start;
750 u64 timestamp; 749 u64 timestamp;
751 s64 n = 0; 750 s64 n = 0;
752 int err; 751 int err;
753 752
754 *mmap_time = ULLONG_MAX; 753 *mmap_time = ULLONG_MAX;
755 md = &evlist->mmap[idx]; 754 md = &evlist->mmap[idx];
756 err = perf_mmap__read_init(md, false, &start, &end); 755 err = perf_mmap__read_init(md);
757 if (err < 0) 756 if (err < 0)
758 return (err == -EAGAIN) ? 0 : -1; 757 return (err == -EAGAIN) ? 0 : -1;
759 758
760 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 759 while ((event = perf_mmap__read_event(md)) != NULL) {
761 err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp); 760 err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
762 if (err) { 761 if (err) {
763 perf_mmap__consume(md, false); 762 perf_mmap__consume(md);
764 pr_err("Failed to parse sample\n"); 763 pr_err("Failed to parse sample\n");
765 return -1; 764 return -1;
766 } 765 }
@@ -770,7 +769,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
770 * FIXME: Here we can't consume the event, as perf_session__queue_event will 769 * FIXME: Here we can't consume the event, as perf_session__queue_event will
771 * point to it, and it'll get possibly overwritten by the kernel. 770 * point to it, and it'll get possibly overwritten by the kernel.
772 */ 771 */
773 perf_mmap__consume(md, false); 772 perf_mmap__consume(md);
774 773
775 if (err) { 774 if (err) {
776 pr_err("Failed to enqueue sample: %d\n", err); 775 pr_err("Failed to enqueue sample: %d\n", err);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 12230ddb6506..b81494587120 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -71,7 +71,6 @@ struct record {
71 struct auxtrace_record *itr; 71 struct auxtrace_record *itr;
72 struct perf_evlist *evlist; 72 struct perf_evlist *evlist;
73 struct perf_session *session; 73 struct perf_session *session;
74 const char *progname;
75 int realtime_prio; 74 int realtime_prio;
76 bool no_buildid; 75 bool no_buildid;
77 bool no_buildid_set; 76 bool no_buildid_set;
@@ -274,6 +273,24 @@ static void record__read_auxtrace_snapshot(struct record *rec)
274 } 273 }
275} 274}
276 275
276static int record__auxtrace_init(struct record *rec)
277{
278 int err;
279
280 if (!rec->itr) {
281 rec->itr = auxtrace_record__init(rec->evlist, &err);
282 if (err)
283 return err;
284 }
285
286 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
287 rec->opts.auxtrace_snapshot_opts);
288 if (err)
289 return err;
290
291 return auxtrace_parse_filters(rec->evlist);
292}
293
277#else 294#else
278 295
279static inline 296static inline
@@ -294,6 +311,11 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
294 return 0; 311 return 0;
295} 312}
296 313
314static int record__auxtrace_init(struct record *rec __maybe_unused)
315{
316 return 0;
317}
318
297#endif 319#endif
298 320
299static int record__mmap_evlist(struct record *rec, 321static int record__mmap_evlist(struct record *rec,
@@ -510,7 +532,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
510 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 532 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
511 533
512 if (maps[i].base) { 534 if (maps[i].base) {
513 if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) { 535 if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
514 rc = -1; 536 rc = -1;
515 goto out; 537 goto out;
516 } 538 }
@@ -831,7 +853,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
831 int status = 0; 853 int status = 0;
832 unsigned long waking = 0; 854 unsigned long waking = 0;
833 const bool forks = argc > 0; 855 const bool forks = argc > 0;
834 struct machine *machine;
835 struct perf_tool *tool = &rec->tool; 856 struct perf_tool *tool = &rec->tool;
836 struct record_opts *opts = &rec->opts; 857 struct record_opts *opts = &rec->opts;
837 struct perf_data *data = &rec->data; 858 struct perf_data *data = &rec->data;
@@ -839,8 +860,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
839 bool disabled = false, draining = false; 860 bool disabled = false, draining = false;
840 int fd; 861 int fd;
841 862
842 rec->progname = argv[0];
843
844 atexit(record__sig_exit); 863 atexit(record__sig_exit);
845 signal(SIGCHLD, sig_handler); 864 signal(SIGCHLD, sig_handler);
846 signal(SIGINT, sig_handler); 865 signal(SIGINT, sig_handler);
@@ -936,8 +955,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
936 goto out_child; 955 goto out_child;
937 } 956 }
938 957
939 machine = &session->machines.host;
940
941 err = record__synthesize(rec, false); 958 err = record__synthesize(rec, false);
942 if (err < 0) 959 if (err < 0)
943 goto out_child; 960 goto out_child;
@@ -965,6 +982,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
965 * Let the child rip 982 * Let the child rip
966 */ 983 */
967 if (forks) { 984 if (forks) {
985 struct machine *machine = &session->machines.host;
968 union perf_event *event; 986 union perf_event *event;
969 pid_t tgid; 987 pid_t tgid;
970 988
@@ -1727,17 +1745,6 @@ int cmd_record(int argc, const char **argv)
1727 alarm(rec->switch_output.time); 1745 alarm(rec->switch_output.time);
1728 } 1746 }
1729 1747
1730 if (!rec->itr) {
1731 rec->itr = auxtrace_record__init(rec->evlist, &err);
1732 if (err)
1733 goto out;
1734 }
1735
1736 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1737 rec->opts.auxtrace_snapshot_opts);
1738 if (err)
1739 goto out;
1740
1741 /* 1748 /*
1742 * Allow aliases to facilitate the lookup of symbols for address 1749 * Allow aliases to facilitate the lookup of symbols for address
1743 * filters. Refer to auxtrace_parse_filters(). 1750 * filters. Refer to auxtrace_parse_filters().
@@ -1746,7 +1753,7 @@ int cmd_record(int argc, const char **argv)
1746 1753
1747 symbol__init(NULL); 1754 symbol__init(NULL);
1748 1755
1749 err = auxtrace_parse_filters(rec->evlist); 1756 err = record__auxtrace_init(rec);
1750 if (err) 1757 if (err)
1751 goto out; 1758 goto out;
1752 1759
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1eedb1815c4c..971ccba85464 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -400,8 +400,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
400 400
401 nr_samples = convert_unit(nr_samples, &unit); 401 nr_samples = convert_unit(nr_samples, &unit);
402 ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit); 402 ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
403 if (evname != NULL) 403 if (evname != NULL) {
404 ret += fprintf(fp, " of event '%s'", evname); 404 ret += fprintf(fp, " of event%s '%s'",
405 evsel->nr_members > 1 ? "s" : "", evname);
406 }
405 407
406 if (rep->time_str) 408 if (rep->time_str)
407 ret += fprintf(fp, " (time slices: %s)", rep->time_str); 409 ret += fprintf(fp, " (time slices: %s)", rep->time_str);
@@ -1175,8 +1177,17 @@ repeat:
1175 has_br_stack = perf_header__has_feat(&session->header, 1177 has_br_stack = perf_header__has_feat(&session->header,
1176 HEADER_BRANCH_STACK); 1178 HEADER_BRANCH_STACK);
1177 1179
1178 if (group_set && !session->evlist->nr_groups) 1180 /*
1181 * Events in data file are not collect in groups, but we still want
1182 * the group display. Set the artificial group and set the leader's
1183 * forced_leader flag to notify the display code.
1184 */
1185 if (group_set && !session->evlist->nr_groups) {
1186 struct perf_evsel *leader = perf_evlist__first(session->evlist);
1187
1179 perf_evlist__set_leader(session->evlist); 1188 perf_evlist__set_leader(session->evlist);
1189 leader->forced_leader = true;
1190 }
1180 1191
1181 if (itrace_synth_opts.last_branch) 1192 if (itrace_synth_opts.last_branch)
1182 has_br_stack = true; 1193 has_br_stack = true;
@@ -1337,6 +1348,15 @@ repeat:
1337 report.range_num = 1; 1348 report.range_num = 1;
1338 } 1349 }
1339 1350
1351 if (session->tevent.pevent &&
1352 pevent_set_function_resolver(session->tevent.pevent,
1353 machine__resolve_kernel_addr,
1354 &session->machines.host) < 0) {
1355 pr_err("%s: failed to set libtraceevent function resolver\n",
1356 __func__);
1357 return -1;
1358 }
1359
1340 sort__setup_elide(stdout); 1360 sort__setup_elide(stdout);
1341 1361
1342 ret = __cmd_report(&report); 1362 ret = __cmd_report(&report);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 83283fedb00f..4dfdee668b0c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -254,6 +254,10 @@ struct thread_runtime {
254 u64 total_delay_time; 254 u64 total_delay_time;
255 255
256 int last_state; 256 int last_state;
257
258 char shortname[3];
259 bool comm_changed;
260
257 u64 migrations; 261 u64 migrations;
258}; 262};
259 263
@@ -897,6 +901,37 @@ struct sort_dimension {
897 struct list_head list; 901 struct list_head list;
898}; 902};
899 903
904/*
905 * handle runtime stats saved per thread
906 */
907static struct thread_runtime *thread__init_runtime(struct thread *thread)
908{
909 struct thread_runtime *r;
910
911 r = zalloc(sizeof(struct thread_runtime));
912 if (!r)
913 return NULL;
914
915 init_stats(&r->run_stats);
916 thread__set_priv(thread, r);
917
918 return r;
919}
920
921static struct thread_runtime *thread__get_runtime(struct thread *thread)
922{
923 struct thread_runtime *tr;
924
925 tr = thread__priv(thread);
926 if (tr == NULL) {
927 tr = thread__init_runtime(thread);
928 if (tr == NULL)
929 pr_debug("Failed to malloc memory for runtime data.\n");
930 }
931
932 return tr;
933}
934
900static int 935static int
901thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 936thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
902{ 937{
@@ -1480,6 +1515,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1480{ 1515{
1481 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1516 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
1482 struct thread *sched_in; 1517 struct thread *sched_in;
1518 struct thread_runtime *tr;
1483 int new_shortname; 1519 int new_shortname;
1484 u64 timestamp0, timestamp = sample->time; 1520 u64 timestamp0, timestamp = sample->time;
1485 s64 delta; 1521 s64 delta;
@@ -1519,22 +1555,28 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1519 if (sched_in == NULL) 1555 if (sched_in == NULL)
1520 return -1; 1556 return -1;
1521 1557
1558 tr = thread__get_runtime(sched_in);
1559 if (tr == NULL) {
1560 thread__put(sched_in);
1561 return -1;
1562 }
1563
1522 sched->curr_thread[this_cpu] = thread__get(sched_in); 1564 sched->curr_thread[this_cpu] = thread__get(sched_in);
1523 1565
1524 printf(" "); 1566 printf(" ");
1525 1567
1526 new_shortname = 0; 1568 new_shortname = 0;
1527 if (!sched_in->shortname[0]) { 1569 if (!tr->shortname[0]) {
1528 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1570 if (!strcmp(thread__comm_str(sched_in), "swapper")) {
1529 /* 1571 /*
1530 * Don't allocate a letter-number for swapper:0 1572 * Don't allocate a letter-number for swapper:0
1531 * as a shortname. Instead, we use '.' for it. 1573 * as a shortname. Instead, we use '.' for it.
1532 */ 1574 */
1533 sched_in->shortname[0] = '.'; 1575 tr->shortname[0] = '.';
1534 sched_in->shortname[1] = ' '; 1576 tr->shortname[1] = ' ';
1535 } else { 1577 } else {
1536 sched_in->shortname[0] = sched->next_shortname1; 1578 tr->shortname[0] = sched->next_shortname1;
1537 sched_in->shortname[1] = sched->next_shortname2; 1579 tr->shortname[1] = sched->next_shortname2;
1538 1580
1539 if (sched->next_shortname1 < 'Z') { 1581 if (sched->next_shortname1 < 'Z') {
1540 sched->next_shortname1++; 1582 sched->next_shortname1++;
@@ -1552,6 +1594,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1552 for (i = 0; i < cpus_nr; i++) { 1594 for (i = 0; i < cpus_nr; i++) {
1553 int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; 1595 int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
1554 struct thread *curr_thread = sched->curr_thread[cpu]; 1596 struct thread *curr_thread = sched->curr_thread[cpu];
1597 struct thread_runtime *curr_tr;
1555 const char *pid_color = color; 1598 const char *pid_color = color;
1556 const char *cpu_color = color; 1599 const char *cpu_color = color;
1557 1600
@@ -1569,9 +1612,14 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1569 else 1612 else
1570 color_fprintf(stdout, cpu_color, "*"); 1613 color_fprintf(stdout, cpu_color, "*");
1571 1614
1572 if (sched->curr_thread[cpu]) 1615 if (sched->curr_thread[cpu]) {
1573 color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); 1616 curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
1574 else 1617 if (curr_tr == NULL) {
1618 thread__put(sched_in);
1619 return -1;
1620 }
1621 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
1622 } else
1575 color_fprintf(stdout, color, " "); 1623 color_fprintf(stdout, color, " ");
1576 } 1624 }
1577 1625
@@ -1580,14 +1628,15 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1580 1628
1581 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1629 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
1582 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1630 color_fprintf(stdout, color, " %12s secs ", stimestamp);
1583 if (new_shortname || (verbose > 0 && sched_in->tid)) { 1631 if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
1584 const char *pid_color = color; 1632 const char *pid_color = color;
1585 1633
1586 if (thread__has_color(sched_in)) 1634 if (thread__has_color(sched_in))
1587 pid_color = COLOR_PIDS; 1635 pid_color = COLOR_PIDS;
1588 1636
1589 color_fprintf(stdout, pid_color, "%s => %s:%d", 1637 color_fprintf(stdout, pid_color, "%s => %s:%d",
1590 sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); 1638 tr->shortname, thread__comm_str(sched_in), sched_in->tid);
1639 tr->comm_changed = false;
1591 } 1640 }
1592 1641
1593 if (sched->map.comp && new_cpu) 1642 if (sched->map.comp && new_cpu)
@@ -1691,6 +1740,37 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
1691 return err; 1740 return err;
1692} 1741}
1693 1742
1743static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
1744 union perf_event *event,
1745 struct perf_sample *sample,
1746 struct machine *machine)
1747{
1748 struct thread *thread;
1749 struct thread_runtime *tr;
1750 int err;
1751
1752 err = perf_event__process_comm(tool, event, sample, machine);
1753 if (err)
1754 return err;
1755
1756 thread = machine__find_thread(machine, sample->pid, sample->tid);
1757 if (!thread) {
1758 pr_err("Internal error: can't find thread\n");
1759 return -1;
1760 }
1761
1762 tr = thread__get_runtime(thread);
1763 if (tr == NULL) {
1764 thread__put(thread);
1765 return -1;
1766 }
1767
1768 tr->comm_changed = true;
1769 thread__put(thread);
1770
1771 return 0;
1772}
1773
1694static int perf_sched__read_events(struct perf_sched *sched) 1774static int perf_sched__read_events(struct perf_sched *sched)
1695{ 1775{
1696 const struct perf_evsel_str_handler handlers[] = { 1776 const struct perf_evsel_str_handler handlers[] = {
@@ -2200,37 +2280,6 @@ static void save_idle_callchain(struct idle_thread_runtime *itr,
2200 callchain_cursor__copy(&itr->cursor, &callchain_cursor); 2280 callchain_cursor__copy(&itr->cursor, &callchain_cursor);
2201} 2281}
2202 2282
2203/*
2204 * handle runtime stats saved per thread
2205 */
2206static struct thread_runtime *thread__init_runtime(struct thread *thread)
2207{
2208 struct thread_runtime *r;
2209
2210 r = zalloc(sizeof(struct thread_runtime));
2211 if (!r)
2212 return NULL;
2213
2214 init_stats(&r->run_stats);
2215 thread__set_priv(thread, r);
2216
2217 return r;
2218}
2219
2220static struct thread_runtime *thread__get_runtime(struct thread *thread)
2221{
2222 struct thread_runtime *tr;
2223
2224 tr = thread__priv(thread);
2225 if (tr == NULL) {
2226 tr = thread__init_runtime(thread);
2227 if (tr == NULL)
2228 pr_debug("Failed to malloc memory for runtime data.\n");
2229 }
2230
2231 return tr;
2232}
2233
2234static struct thread *timehist_get_thread(struct perf_sched *sched, 2283static struct thread *timehist_get_thread(struct perf_sched *sched,
2235 struct perf_sample *sample, 2284 struct perf_sample *sample,
2236 struct machine *machine, 2285 struct machine *machine,
@@ -3291,7 +3340,7 @@ int cmd_sched(int argc, const char **argv)
3291 struct perf_sched sched = { 3340 struct perf_sched sched = {
3292 .tool = { 3341 .tool = {
3293 .sample = perf_sched__process_tracepoint_sample, 3342 .sample = perf_sched__process_tracepoint_sample,
3294 .comm = perf_event__process_comm, 3343 .comm = perf_sched__process_comm,
3295 .namespaces = perf_event__process_namespaces, 3344 .namespaces = perf_event__process_namespaces,
3296 .lost = perf_event__process_lost, 3345 .lost = perf_event__process_lost,
3297 .fork = perf_sched__process_fork_event, 3346 .fork = perf_sched__process_fork_event,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3a022b3e5c02..0fa9ea3a6d92 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1251,6 +1251,31 @@ static void aggr_update_shadow(void)
1251 } 1251 }
1252} 1252}
1253 1253
1254static void uniquify_event_name(struct perf_evsel *counter)
1255{
1256 char *new_name;
1257 char *config;
1258
1259 if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
1260 strlen(counter->pmu_name)))
1261 return;
1262
1263 config = strchr(counter->name, '/');
1264 if (config) {
1265 if (asprintf(&new_name,
1266 "%s%s", counter->pmu_name, config) > 0) {
1267 free(counter->name);
1268 counter->name = new_name;
1269 }
1270 } else {
1271 if (asprintf(&new_name,
1272 "%s [%s]", counter->name, counter->pmu_name) > 0) {
1273 free(counter->name);
1274 counter->name = new_name;
1275 }
1276 }
1277}
1278
1254static void collect_all_aliases(struct perf_evsel *counter, 1279static void collect_all_aliases(struct perf_evsel *counter,
1255 void (*cb)(struct perf_evsel *counter, void *data, 1280 void (*cb)(struct perf_evsel *counter, void *data,
1256 bool first), 1281 bool first),
@@ -1279,7 +1304,9 @@ static bool collect_data(struct perf_evsel *counter,
1279 if (counter->merged_stat) 1304 if (counter->merged_stat)
1280 return false; 1305 return false;
1281 cb(counter, data, true); 1306 cb(counter, data, true);
1282 if (!no_merge && counter->auto_merge_stats) 1307 if (no_merge)
1308 uniquify_event_name(counter);
1309 else if (counter->auto_merge_stats)
1283 collect_all_aliases(counter, cb, data); 1310 collect_all_aliases(counter, cb, data);
1284 return true; 1311 return true;
1285} 1312}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bb4f9fafd11d..0a26b56afcc5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -817,14 +817,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
817 struct perf_session *session = top->session; 817 struct perf_session *session = top->session;
818 union perf_event *event; 818 union perf_event *event;
819 struct machine *machine; 819 struct machine *machine;
820 u64 end, start;
821 int ret; 820 int ret;
822 821
823 md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; 822 md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
824 if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0) 823 if (perf_mmap__read_init(md) < 0)
825 return; 824 return;
826 825
827 while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) { 826 while ((event = perf_mmap__read_event(md)) != NULL) {
828 ret = perf_evlist__parse_sample(evlist, event, &sample); 827 ret = perf_evlist__parse_sample(evlist, event, &sample);
829 if (ret) { 828 if (ret) {
830 pr_err("Can't parse sample, err = %d\n", ret); 829 pr_err("Can't parse sample, err = %d\n", ret);
@@ -879,7 +878,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
879 } else 878 } else
880 ++session->evlist->stats.nr_unknown_events; 879 ++session->evlist->stats.nr_unknown_events;
881next_event: 880next_event:
882 perf_mmap__consume(md, opts->overwrite); 881 perf_mmap__consume(md);
883 } 882 }
884 883
885 perf_mmap__read_done(md); 884 perf_mmap__read_done(md);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1a93debc1e8d..87b95c9410b4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
19#include <traceevent/event-parse.h> 19#include <traceevent/event-parse.h>
20#include <api/fs/tracing_path.h> 20#include <api/fs/tracing_path.h>
21#include "builtin.h" 21#include "builtin.h"
22#include "util/cgroup.h"
22#include "util/color.h" 23#include "util/color.h"
23#include "util/debug.h" 24#include "util/debug.h"
24#include "util/env.h" 25#include "util/env.h"
@@ -83,6 +84,7 @@ struct trace {
83 struct perf_evlist *evlist; 84 struct perf_evlist *evlist;
84 struct machine *host; 85 struct machine *host;
85 struct thread *current; 86 struct thread *current;
87 struct cgroup *cgroup;
86 u64 base_time; 88 u64 base_time;
87 FILE *output; 89 FILE *output;
88 unsigned long nr_events; 90 unsigned long nr_events;
@@ -2370,6 +2372,34 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2370 trace__sched_stat_runtime)) 2372 trace__sched_stat_runtime))
2371 goto out_error_sched_stat_runtime; 2373 goto out_error_sched_stat_runtime;
2372 2374
2375 /*
2376 * If a global cgroup was set, apply it to all the events without an
2377 * explicit cgroup. I.e.:
2378 *
2379 * trace -G A -e sched:*switch
2380 *
2381 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
2382 * _and_ sched:sched_switch to the 'A' cgroup, while:
2383 *
2384 * trace -e sched:*switch -G A
2385 *
2386 * will only set the sched:sched_switch event to the 'A' cgroup, all the
2387 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
2388 * a cgroup (on the root cgroup, sys wide, etc).
2389 *
2390 * Multiple cgroups:
2391 *
2392 * trace -G A -e sched:*switch -G B
2393 *
2394 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
2395 * to the 'B' cgroup.
2396 *
2397 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
2398 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
2399 */
2400 if (trace->cgroup)
2401 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
2402
2373 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2403 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2374 if (err < 0) { 2404 if (err < 0) {
2375 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2405 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
@@ -2473,13 +2503,12 @@ again:
2473 for (i = 0; i < evlist->nr_mmaps; i++) { 2503 for (i = 0; i < evlist->nr_mmaps; i++) {
2474 union perf_event *event; 2504 union perf_event *event;
2475 struct perf_mmap *md; 2505 struct perf_mmap *md;
2476 u64 end, start;
2477 2506
2478 md = &evlist->mmap[i]; 2507 md = &evlist->mmap[i];
2479 if (perf_mmap__read_init(md, false, &start, &end) < 0) 2508 if (perf_mmap__read_init(md) < 0)
2480 continue; 2509 continue;
2481 2510
2482 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 2511 while ((event = perf_mmap__read_event(md)) != NULL) {
2483 struct perf_sample sample; 2512 struct perf_sample sample;
2484 2513
2485 ++trace->nr_events; 2514 ++trace->nr_events;
@@ -2492,7 +2521,7 @@ again:
2492 2521
2493 trace__handle_event(trace, event, &sample); 2522 trace__handle_event(trace, event, &sample);
2494next_event: 2523next_event:
2495 perf_mmap__consume(md, false); 2524 perf_mmap__consume(md);
2496 2525
2497 if (interrupted) 2526 if (interrupted)
2498 goto out_disable; 2527 goto out_disable;
@@ -2540,6 +2569,7 @@ out_delete_evlist:
2540 trace__symbols__exit(trace); 2569 trace__symbols__exit(trace);
2541 2570
2542 perf_evlist__delete(evlist); 2571 perf_evlist__delete(evlist);
2572 cgroup__put(trace->cgroup);
2543 trace->evlist = NULL; 2573 trace->evlist = NULL;
2544 trace->live = false; 2574 trace->live = false;
2545 return err; 2575 return err;
@@ -2979,6 +3009,18 @@ out:
2979 return err; 3009 return err;
2980} 3010}
2981 3011
3012static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3013{
3014 struct trace *trace = opt->value;
3015
3016 if (!list_empty(&trace->evlist->entries))
3017 return parse_cgroups(opt, str, unset);
3018
3019 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
3020
3021 return 0;
3022}
3023
2982int cmd_trace(int argc, const char **argv) 3024int cmd_trace(int argc, const char **argv)
2983{ 3025{
2984 const char *trace_usage[] = { 3026 const char *trace_usage[] = {
@@ -3069,6 +3111,8 @@ int cmd_trace(int argc, const char **argv)
3069 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), 3111 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
3070 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 3112 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3071 "per thread proc mmap processing timeout in ms"), 3113 "per thread proc mmap processing timeout in ms"),
3114 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
3115 trace__parse_cgroups),
3072 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 3116 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3073 "ms to wait before starting measurement after program " 3117 "ms to wait before starting measurement after program "
3074 "start"), 3118 "start"),
@@ -3095,6 +3139,11 @@ int cmd_trace(int argc, const char **argv)
3095 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 3139 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3096 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3140 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3097 3141
3142 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
3143 usage_with_options_msg(trace_usage, trace_options,
3144 "cgroup monitoring only available in system-wide mode");
3145 }
3146
3098 err = bpf__setup_stdout(trace.evlist); 3147 err = bpf__setup_stdout(trace.evlist);
3099 if (err) { 3148 if (err) {
3100 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); 3149 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index e0b1b414d466..6d598cc071ae 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,10 +33,9 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
33 for (i = 0; i < evlist->nr_mmaps; i++) { 33 for (i = 0; i < evlist->nr_mmaps; i++) {
34 struct perf_mmap *map = &evlist->overwrite_mmap[i]; 34 struct perf_mmap *map = &evlist->overwrite_mmap[i];
35 union perf_event *event; 35 union perf_event *event;
36 u64 start, end;
37 36
38 perf_mmap__read_init(map, true, &start, &end); 37 perf_mmap__read_init(map);
39 while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) { 38 while ((event = perf_mmap__read_event(map)) != NULL) {
40 const u32 type = event->header.type; 39 const u32 type = event->header.type;
41 40
42 switch (type) { 41 switch (type) {
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 09c9c9f9e827..79b54f8ddebf 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -177,13 +177,12 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
177 for (i = 0; i < evlist->nr_mmaps; i++) { 177 for (i = 0; i < evlist->nr_mmaps; i++) {
178 union perf_event *event; 178 union perf_event *event;
179 struct perf_mmap *md; 179 struct perf_mmap *md;
180 u64 end, start;
181 180
182 md = &evlist->mmap[i]; 181 md = &evlist->mmap[i];
183 if (perf_mmap__read_init(md, false, &start, &end) < 0) 182 if (perf_mmap__read_init(md) < 0)
184 continue; 183 continue;
185 184
186 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 185 while ((event = perf_mmap__read_event(md)) != NULL) {
187 const u32 type = event->header.type; 186 const u32 type = event->header.type;
188 187
189 if (type == PERF_RECORD_SAMPLE) 188 if (type == PERF_RECORD_SAMPLE)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 03ed8c77b1bb..99936352df4f 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -410,17 +410,16 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
410{ 410{
411 union perf_event *event; 411 union perf_event *event;
412 struct perf_mmap *md; 412 struct perf_mmap *md;
413 u64 end, start;
414 int i, ret; 413 int i, ret;
415 414
416 for (i = 0; i < evlist->nr_mmaps; i++) { 415 for (i = 0; i < evlist->nr_mmaps; i++) {
417 md = &evlist->mmap[i]; 416 md = &evlist->mmap[i];
418 if (perf_mmap__read_init(md, false, &start, &end) < 0) 417 if (perf_mmap__read_init(md) < 0)
419 continue; 418 continue;
420 419
421 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 420 while ((event = perf_mmap__read_event(md)) != NULL) {
422 ret = process_event(machine, evlist, event, state); 421 ret = process_event(machine, evlist, event, state);
423 perf_mmap__consume(md, false); 422 perf_mmap__consume(md);
424 if (ret < 0) 423 if (ret < 0)
425 return ret; 424 return ret;
426 } 425 }
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 4590d8fb91ab..17c46f3e6f1e 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -28,21 +28,20 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
28{ 28{
29 union perf_event *event; 29 union perf_event *event;
30 struct perf_mmap *md; 30 struct perf_mmap *md;
31 u64 end, start;
32 int i, found; 31 int i, found;
33 32
34 found = 0; 33 found = 0;
35 for (i = 0; i < evlist->nr_mmaps; i++) { 34 for (i = 0; i < evlist->nr_mmaps; i++) {
36 md = &evlist->mmap[i]; 35 md = &evlist->mmap[i];
37 if (perf_mmap__read_init(md, false, &start, &end) < 0) 36 if (perf_mmap__read_init(md) < 0)
38 continue; 37 continue;
39 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 38 while ((event = perf_mmap__read_event(md)) != NULL) {
40 if (event->header.type == PERF_RECORD_COMM && 39 if (event->header.type == PERF_RECORD_COMM &&
41 (pid_t)event->comm.pid == getpid() && 40 (pid_t)event->comm.pid == getpid() &&
42 (pid_t)event->comm.tid == getpid() && 41 (pid_t)event->comm.tid == getpid() &&
43 strcmp(event->comm.comm, comm) == 0) 42 strcmp(event->comm.comm, comm) == 0)
44 found += 1; 43 found += 1;
45 perf_mmap__consume(md, false); 44 perf_mmap__consume(md);
46 } 45 }
47 perf_mmap__read_done(md); 46 perf_mmap__read_done(md);
48 } 47 }
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 44c58d69cd87..bb8e6bcb0d96 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -39,7 +39,6 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
39 struct perf_evsel *evsels[nsyscalls], *evsel; 39 struct perf_evsel *evsels[nsyscalls], *evsel;
40 char sbuf[STRERR_BUFSIZE]; 40 char sbuf[STRERR_BUFSIZE];
41 struct perf_mmap *md; 41 struct perf_mmap *md;
42 u64 end, start;
43 42
44 threads = thread_map__new(-1, getpid(), UINT_MAX); 43 threads = thread_map__new(-1, getpid(), UINT_MAX);
45 if (threads == NULL) { 44 if (threads == NULL) {
@@ -109,10 +108,10 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
109 } 108 }
110 109
111 md = &evlist->mmap[0]; 110 md = &evlist->mmap[0];
112 if (perf_mmap__read_init(md, false, &start, &end) < 0) 111 if (perf_mmap__read_init(md) < 0)
113 goto out_init; 112 goto out_init;
114 113
115 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 114 while ((event = perf_mmap__read_event(md)) != NULL) {
116 struct perf_sample sample; 115 struct perf_sample sample;
117 116
118 if (event->header.type != PERF_RECORD_SAMPLE) { 117 if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -135,7 +134,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
135 goto out_delete_evlist; 134 goto out_delete_evlist;
136 } 135 }
137 nr_events[evsel->idx]++; 136 nr_events[evsel->idx]++;
138 perf_mmap__consume(md, false); 137 perf_mmap__consume(md);
139 } 138 }
140 perf_mmap__read_done(md); 139 perf_mmap__read_done(md);
141 140
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 620b21023f72..344dc3ac2469 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -87,13 +87,12 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
87 for (i = 0; i < evlist->nr_mmaps; i++) { 87 for (i = 0; i < evlist->nr_mmaps; i++) {
88 union perf_event *event; 88 union perf_event *event;
89 struct perf_mmap *md; 89 struct perf_mmap *md;
90 u64 end, start;
91 90
92 md = &evlist->mmap[i]; 91 md = &evlist->mmap[i];
93 if (perf_mmap__read_init(md, false, &start, &end) < 0) 92 if (perf_mmap__read_init(md) < 0)
94 continue; 93 continue;
95 94
96 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 95 while ((event = perf_mmap__read_event(md)) != NULL) {
97 const u32 type = event->header.type; 96 const u32 type = event->header.type;
98 int tp_flags; 97 int tp_flags;
99 struct perf_sample sample; 98 struct perf_sample sample;
@@ -101,7 +100,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
101 ++nr_events; 100 ++nr_events;
102 101
103 if (type != PERF_RECORD_SAMPLE) { 102 if (type != PERF_RECORD_SAMPLE) {
104 perf_mmap__consume(md, false); 103 perf_mmap__consume(md);
105 continue; 104 continue;
106 } 105 }
107 106
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 31f3f70adca6..34394cc05077 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -165,13 +165,12 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
165 for (i = 0; i < evlist->nr_mmaps; i++) { 165 for (i = 0; i < evlist->nr_mmaps; i++) {
166 union perf_event *event; 166 union perf_event *event;
167 struct perf_mmap *md; 167 struct perf_mmap *md;
168 u64 end, start;
169 168
170 md = &evlist->mmap[i]; 169 md = &evlist->mmap[i];
171 if (perf_mmap__read_init(md, false, &start, &end) < 0) 170 if (perf_mmap__read_init(md) < 0)
172 continue; 171 continue;
173 172
174 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 173 while ((event = perf_mmap__read_event(md)) != NULL) {
175 const u32 type = event->header.type; 174 const u32 type = event->header.type;
176 const char *name = perf_event__name(type); 175 const char *name = perf_event__name(type);
177 176
@@ -272,7 +271,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
272 ++errs; 271 ++errs;
273 } 272 }
274 273
275 perf_mmap__consume(md, false); 274 perf_mmap__consume(md);
276 } 275 }
277 perf_mmap__read_done(md); 276 perf_mmap__read_done(md);
278 } 277 }
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index e6320e267ba5..f9490b237893 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -40,7 +40,6 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
40 struct cpu_map *cpus; 40 struct cpu_map *cpus;
41 struct thread_map *threads; 41 struct thread_map *threads;
42 struct perf_mmap *md; 42 struct perf_mmap *md;
43 u64 end, start;
44 43
45 attr.sample_freq = 500; 44 attr.sample_freq = 500;
46 45
@@ -96,10 +95,10 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
96 perf_evlist__disable(evlist); 95 perf_evlist__disable(evlist);
97 96
98 md = &evlist->mmap[0]; 97 md = &evlist->mmap[0];
99 if (perf_mmap__read_init(md, false, &start, &end) < 0) 98 if (perf_mmap__read_init(md) < 0)
100 goto out_init; 99 goto out_init;
101 100
102 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 101 while ((event = perf_mmap__read_event(md)) != NULL) {
103 struct perf_sample sample; 102 struct perf_sample sample;
104 103
105 if (event->header.type != PERF_RECORD_SAMPLE) 104 if (event->header.type != PERF_RECORD_SAMPLE)
@@ -114,7 +113,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
114 total_periods += sample.period; 113 total_periods += sample.period;
115 nr_samples++; 114 nr_samples++;
116next_event: 115next_event:
117 perf_mmap__consume(md, false); 116 perf_mmap__consume(md);
118 } 117 }
119 perf_mmap__read_done(md); 118 perf_mmap__read_done(md);
120 119
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 10c4dcdc2324..9b5be51e5e7b 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -259,18 +259,17 @@ static int process_events(struct perf_evlist *evlist,
259 LIST_HEAD(events); 259 LIST_HEAD(events);
260 struct event_node *events_array, *node; 260 struct event_node *events_array, *node;
261 struct perf_mmap *md; 261 struct perf_mmap *md;
262 u64 end, start;
263 int i, ret; 262 int i, ret;
264 263
265 for (i = 0; i < evlist->nr_mmaps; i++) { 264 for (i = 0; i < evlist->nr_mmaps; i++) {
266 md = &evlist->mmap[i]; 265 md = &evlist->mmap[i];
267 if (perf_mmap__read_init(md, false, &start, &end) < 0) 266 if (perf_mmap__read_init(md) < 0)
268 continue; 267 continue;
269 268
270 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 269 while ((event = perf_mmap__read_event(md)) != NULL) {
271 cnt += 1; 270 cnt += 1;
272 ret = add_event(evlist, &events, event); 271 ret = add_event(evlist, &events, event);
273 perf_mmap__consume(md, false); 272 perf_mmap__consume(md);
274 if (ret < 0) 273 if (ret < 0)
275 goto out_free_nodes; 274 goto out_free_nodes;
276 } 275 }
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 02b0888b72a3..e92fa6029ac7 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -48,7 +48,6 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
48 struct cpu_map *cpus; 48 struct cpu_map *cpus;
49 struct thread_map *threads; 49 struct thread_map *threads;
50 struct perf_mmap *md; 50 struct perf_mmap *md;
51 u64 end, start;
52 51
53 signal(SIGCHLD, sig_handler); 52 signal(SIGCHLD, sig_handler);
54 53
@@ -113,14 +112,14 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
113 112
114retry: 113retry:
115 md = &evlist->mmap[0]; 114 md = &evlist->mmap[0];
116 if (perf_mmap__read_init(md, false, &start, &end) < 0) 115 if (perf_mmap__read_init(md) < 0)
117 goto out_init; 116 goto out_init;
118 117
119 while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { 118 while ((event = perf_mmap__read_event(md)) != NULL) {
120 if (event->header.type == PERF_RECORD_EXIT) 119 if (event->header.type == PERF_RECORD_EXIT)
121 nr_exit++; 120 nr_exit++;
122 121
123 perf_mmap__consume(md, false); 122 perf_mmap__consume(md);
124 } 123 }
125 perf_mmap__read_done(md); 124 perf_mmap__read_done(md);
126 125
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index de2bde232cb3..8b4e82548f8e 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2261,8 +2261,9 @@ static int perf_evsel_browser_title(struct hist_browser *browser,
2261 2261
2262 nr_samples = convert_unit(nr_samples, &unit); 2262 nr_samples = convert_unit(nr_samples, &unit);
2263 printed = scnprintf(bf, size, 2263 printed = scnprintf(bf, size,
2264 "Samples: %lu%c of event '%s',%s%sEvent count (approx.): %" PRIu64, 2264 "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
2265 nr_samples, unit, ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events); 2265 nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
2266 ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
2266 2267
2267 2268
2268 if (hists->uid_filter_str) 2269 if (hists->uid_filter_str)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 49ff825f745c..bc3302da702b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -248,7 +248,7 @@ static struct ins_ops call_ops = {
248 248
249bool ins__is_call(const struct ins *ins) 249bool ins__is_call(const struct ins *ins)
250{ 250{
251 return ins->ops == &call_ops; 251 return ins->ops == &call_ops || ins->ops == &s390_call_ops;
252} 252}
253 253
254static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) 254static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 6470ea2aa25e..fb357a00dd86 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -233,9 +233,9 @@ static void *auxtrace_copy_data(u64 size, struct perf_session *session)
233 return p; 233 return p;
234} 234}
235 235
236static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, 236static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
237 unsigned int idx, 237 unsigned int idx,
238 struct auxtrace_buffer *buffer) 238 struct auxtrace_buffer *buffer)
239{ 239{
240 struct auxtrace_queue *queue; 240 struct auxtrace_queue *queue;
241 int err; 241 int err;
@@ -286,7 +286,7 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
286 return -ENOMEM; 286 return -ENOMEM;
287 b->size = BUFFER_LIMIT_FOR_32_BIT; 287 b->size = BUFFER_LIMIT_FOR_32_BIT;
288 b->consecutive = consecutive; 288 b->consecutive = consecutive;
289 err = auxtrace_queues__add_buffer(queues, idx, b); 289 err = auxtrace_queues__queue_buffer(queues, idx, b);
290 if (err) { 290 if (err) {
291 auxtrace_buffer__free(b); 291 auxtrace_buffer__free(b);
292 return err; 292 return err;
@@ -302,11 +302,14 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
302 return 0; 302 return 0;
303} 303}
304 304
305static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, 305static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
306 struct perf_session *session, 306 struct perf_session *session,
307 unsigned int idx, 307 unsigned int idx,
308 struct auxtrace_buffer *buffer) 308 struct auxtrace_buffer *buffer,
309 struct auxtrace_buffer **buffer_ptr)
309{ 310{
311 int err;
312
310 if (session->one_mmap) { 313 if (session->one_mmap) {
311 buffer->data = buffer->data_offset - session->one_mmap_offset + 314 buffer->data = buffer->data_offset - session->one_mmap_offset +
312 session->one_mmap_addr; 315 session->one_mmap_addr;
@@ -317,14 +320,20 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
317 buffer->data_needs_freeing = true; 320 buffer->data_needs_freeing = true;
318 } else if (BITS_PER_LONG == 32 && 321 } else if (BITS_PER_LONG == 32 &&
319 buffer->size > BUFFER_LIMIT_FOR_32_BIT) { 322 buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
320 int err;
321
322 err = auxtrace_queues__split_buffer(queues, idx, buffer); 323 err = auxtrace_queues__split_buffer(queues, idx, buffer);
323 if (err) 324 if (err)
324 return err; 325 return err;
325 } 326 }
326 327
327 return auxtrace_queues__add_buffer(queues, idx, buffer); 328 err = auxtrace_queues__queue_buffer(queues, idx, buffer);
329 if (err)
330 return err;
331
332 /* FIXME: Doesn't work for split buffer */
333 if (buffer_ptr)
334 *buffer_ptr = buffer;
335
336 return 0;
328} 337}
329 338
330static bool filter_cpu(struct perf_session *session, int cpu) 339static bool filter_cpu(struct perf_session *session, int cpu)
@@ -359,13 +368,11 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
359 buffer->size = event->auxtrace.size; 368 buffer->size = event->auxtrace.size;
360 idx = event->auxtrace.idx; 369 idx = event->auxtrace.idx;
361 370
362 err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer); 371 err = auxtrace_queues__add_buffer(queues, session, idx, buffer,
372 buffer_ptr);
363 if (err) 373 if (err)
364 goto out_err; 374 goto out_err;
365 375
366 if (buffer_ptr)
367 *buffer_ptr = buffer;
368
369 return 0; 376 return 0;
370 377
371out_err: 378out_err:
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 453c148d2158..e731f55da072 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -130,6 +130,7 @@ struct auxtrace_index {
130/** 130/**
131 * struct auxtrace - session callbacks to allow AUX area data decoding. 131 * struct auxtrace - session callbacks to allow AUX area data decoding.
132 * @process_event: lets the decoder see all session events 132 * @process_event: lets the decoder see all session events
133 * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
133 * @flush_events: process any remaining data 134 * @flush_events: process any remaining data
134 * @free_events: free resources associated with event processing 135 * @free_events: free resources associated with event processing
135 * @free: free resources associated with the session 136 * @free: free resources associated with the session
@@ -301,6 +302,7 @@ struct auxtrace_mmap_params {
301 * @parse_snapshot_options: parse snapshot options 302 * @parse_snapshot_options: parse snapshot options
302 * @reference: provide a 64-bit reference number for auxtrace_event 303 * @reference: provide a 64-bit reference number for auxtrace_event
303 * @read_finish: called after reading from an auxtrace mmap 304 * @read_finish: called after reading from an auxtrace mmap
305 * @alignment: alignment (if any) for AUX area data
304 */ 306 */
305struct auxtrace_record { 307struct auxtrace_record {
306 int (*recording_options)(struct auxtrace_record *itr, 308 int (*recording_options)(struct auxtrace_record *itr,
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 5dd9b5ea314d..78408f5c4bad 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -71,7 +71,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
71 return -1; 71 return -1;
72} 72}
73 73
74static int open_cgroup(char *name) 74static int open_cgroup(const char *name)
75{ 75{
76 char path[PATH_MAX + 1]; 76 char path[PATH_MAX + 1];
77 char mnt[PATH_MAX + 1]; 77 char mnt[PATH_MAX + 1];
@@ -90,41 +90,64 @@ static int open_cgroup(char *name)
90 return fd; 90 return fd;
91} 91}
92 92
93static int add_cgroup(struct perf_evlist *evlist, char *str) 93static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
94{ 94{
95 struct perf_evsel *counter; 95 struct perf_evsel *counter;
96 struct cgroup_sel *cgrp = NULL; 96 struct cgroup *cgrp = NULL;
97 int n;
98 /* 97 /*
99 * check if cgrp is already defined, if so we reuse it 98 * check if cgrp is already defined, if so we reuse it
100 */ 99 */
101 evlist__for_each_entry(evlist, counter) { 100 evlist__for_each_entry(evlist, counter) {
102 cgrp = counter->cgrp; 101 if (!counter->cgrp)
103 if (!cgrp)
104 continue; 102 continue;
105 if (!strcmp(cgrp->name, str)) { 103 if (!strcmp(counter->cgrp->name, str)) {
106 refcount_inc(&cgrp->refcnt); 104 cgrp = cgroup__get(counter->cgrp);
107 break; 105 break;
108 } 106 }
109
110 cgrp = NULL;
111 } 107 }
112 108
113 if (!cgrp) { 109 return cgrp;
114 cgrp = zalloc(sizeof(*cgrp)); 110}
115 if (!cgrp) 111
116 return -1; 112static struct cgroup *cgroup__new(const char *name)
113{
114 struct cgroup *cgroup = zalloc(sizeof(*cgroup));
117 115
118 cgrp->name = str; 116 if (cgroup != NULL) {
119 refcount_set(&cgrp->refcnt, 1); 117 refcount_set(&cgroup->refcnt, 1);
120 118
121 cgrp->fd = open_cgroup(str); 119 cgroup->name = strdup(name);
122 if (cgrp->fd == -1) { 120 if (!cgroup->name)
123 free(cgrp); 121 goto out_err;
124 return -1; 122 cgroup->fd = open_cgroup(name);
125 } 123 if (cgroup->fd == -1)
124 goto out_free_name;
126 } 125 }
127 126
127 return cgroup;
128
129out_free_name:
130 free(cgroup->name);
131out_err:
132 free(cgroup);
133 return NULL;
134}
135
136struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name)
137{
138 struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
139
140 return cgroup ?: cgroup__new(name);
141}
142
143static int add_cgroup(struct perf_evlist *evlist, const char *str)
144{
145 struct perf_evsel *counter;
146 struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
147 int n;
148
149 if (!cgrp)
150 return -1;
128 /* 151 /*
129 * find corresponding event 152 * find corresponding event
130 * if add cgroup N, then need to find event N 153 * if add cgroup N, then need to find event N
@@ -135,30 +158,55 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
135 goto found; 158 goto found;
136 n++; 159 n++;
137 } 160 }
138 if (refcount_dec_and_test(&cgrp->refcnt))
139 free(cgrp);
140 161
162 cgroup__put(cgrp);
141 return -1; 163 return -1;
142found: 164found:
143 counter->cgrp = cgrp; 165 counter->cgrp = cgrp;
144 return 0; 166 return 0;
145} 167}
146 168
147void close_cgroup(struct cgroup_sel *cgrp) 169static void cgroup__delete(struct cgroup *cgroup)
170{
171 close(cgroup->fd);
172 zfree(&cgroup->name);
173 free(cgroup);
174}
175
176void cgroup__put(struct cgroup *cgrp)
148{ 177{
149 if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) { 178 if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
150 close(cgrp->fd); 179 cgroup__delete(cgrp);
151 zfree(&cgrp->name);
152 free(cgrp);
153 } 180 }
154} 181}
155 182
156int parse_cgroups(const struct option *opt __maybe_unused, const char *str, 183struct cgroup *cgroup__get(struct cgroup *cgroup)
184{
185 if (cgroup)
186 refcount_inc(&cgroup->refcnt);
187 return cgroup;
188}
189
190static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup)
191{
192 if (evsel->cgrp == NULL)
193 evsel->cgrp = cgroup__get(cgroup);
194}
195
196void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup)
197{
198 struct perf_evsel *evsel;
199
200 evlist__for_each_entry(evlist, evsel)
201 evsel__set_default_cgroup(evsel, cgroup);
202}
203
204int parse_cgroups(const struct option *opt, const char *str,
157 int unset __maybe_unused) 205 int unset __maybe_unused)
158{ 206{
159 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; 207 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
160 struct perf_evsel *counter; 208 struct perf_evsel *counter;
161 struct cgroup_sel *cgrp = NULL; 209 struct cgroup *cgrp = NULL;
162 const char *p, *e, *eos = str + strlen(str); 210 const char *p, *e, *eos = str + strlen(str);
163 char *s; 211 char *s;
164 int ret, i; 212 int ret, i;
@@ -179,10 +227,9 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
179 if (!s) 227 if (!s)
180 return -1; 228 return -1;
181 ret = add_cgroup(evlist, s); 229 ret = add_cgroup(evlist, s);
182 if (ret) { 230 free(s);
183 free(s); 231 if (ret)
184 return -1; 232 return -1;
185 }
186 } 233 }
187 /* nr_cgroups is increased een for empty cgroups */ 234 /* nr_cgroups is increased een for empty cgroups */
188 nr_cgroups++; 235 nr_cgroups++;
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index afafc87e9201..f033a80c1b14 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -6,7 +6,7 @@
6 6
7struct option; 7struct option;
8 8
9struct cgroup_sel { 9struct cgroup {
10 char *name; 10 char *name;
11 int fd; 11 int fd;
12 refcount_t refcnt; 12 refcount_t refcnt;
@@ -14,7 +14,16 @@ struct cgroup_sel {
14 14
15 15
16extern int nr_cgroups; /* number of explicit cgroups defined */ 16extern int nr_cgroups; /* number of explicit cgroups defined */
17void close_cgroup(struct cgroup_sel *cgrp); 17
18struct cgroup *cgroup__get(struct cgroup *cgroup);
19void cgroup__put(struct cgroup *cgroup);
20
21struct perf_evlist;
22
23struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name);
24
25void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
26
18int parse_cgroups(const struct option *opt, const char *str, int unset); 27int parse_cgroups(const struct option *opt, const char *str, int unset);
19 28
20#endif /* __CGROUP_H__ */ 29#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index bf970f57dce0..c4ef2e523367 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -27,6 +27,12 @@ struct numa_node {
27 struct cpu_map *map; 27 struct cpu_map *map;
28}; 28};
29 29
30struct memory_node {
31 u64 node;
32 u64 size;
33 unsigned long *set;
34};
35
30struct perf_env { 36struct perf_env {
31 char *hostname; 37 char *hostname;
32 char *os_release; 38 char *os_release;
@@ -43,6 +49,7 @@ struct perf_env {
43 int nr_sibling_cores; 49 int nr_sibling_cores;
44 int nr_sibling_threads; 50 int nr_sibling_threads;
45 int nr_numa_nodes; 51 int nr_numa_nodes;
52 int nr_memory_nodes;
46 int nr_pmu_mappings; 53 int nr_pmu_mappings;
47 int nr_groups; 54 int nr_groups;
48 char *cmdline; 55 char *cmdline;
@@ -54,6 +61,8 @@ struct perf_env {
54 struct cpu_cache_level *caches; 61 struct cpu_cache_level *caches;
55 int caches_cnt; 62 int caches_cnt;
56 struct numa_node *numa_nodes; 63 struct numa_node *numa_nodes;
64 struct memory_node *memory_nodes;
65 unsigned long long memory_bsize;
57}; 66};
58 67
59extern struct perf_env perf_env; 68extern struct perf_env perf_env;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 41a4666f1519..a59281d64368 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -722,7 +722,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
722 zfree(&evlist->overwrite_mmap); 722 zfree(&evlist->overwrite_mmap);
723} 723}
724 724
725static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) 725static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
726 bool overwrite)
726{ 727{
727 int i; 728 int i;
728 struct perf_mmap *map; 729 struct perf_mmap *map;
@@ -736,6 +737,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
736 737
737 for (i = 0; i < evlist->nr_mmaps; i++) { 738 for (i = 0; i < evlist->nr_mmaps; i++) {
738 map[i].fd = -1; 739 map[i].fd = -1;
740 map[i].overwrite = overwrite;
739 /* 741 /*
740 * When the perf_mmap() call is made we grab one refcount, plus 742 * When the perf_mmap() call is made we grab one refcount, plus
741 * one extra to let perf_mmap__consume() get the last 743 * one extra to let perf_mmap__consume() get the last
@@ -779,7 +781,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
779 maps = evlist->overwrite_mmap; 781 maps = evlist->overwrite_mmap;
780 782
781 if (!maps) { 783 if (!maps) {
782 maps = perf_evlist__alloc_mmap(evlist); 784 maps = perf_evlist__alloc_mmap(evlist, true);
783 if (!maps) 785 if (!maps)
784 return -1; 786 return -1;
785 evlist->overwrite_mmap = maps; 787 evlist->overwrite_mmap = maps;
@@ -1029,7 +1031,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1029 struct mmap_params mp; 1031 struct mmap_params mp;
1030 1032
1031 if (!evlist->mmap) 1033 if (!evlist->mmap)
1032 evlist->mmap = perf_evlist__alloc_mmap(evlist); 1034 evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
1033 if (!evlist->mmap) 1035 if (!evlist->mmap)
1034 return -ENOMEM; 1036 return -ENOMEM;
1035 1037
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index b56e1c2ddaee..1ac8d9236efd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -244,6 +244,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
244 evsel->metric_name = NULL; 244 evsel->metric_name = NULL;
245 evsel->metric_events = NULL; 245 evsel->metric_events = NULL;
246 evsel->collect_stat = false; 246 evsel->collect_stat = false;
247 evsel->pmu_name = NULL;
247} 248}
248 249
249struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) 250struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -621,22 +622,34 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel)
621 return evsel->group_name ?: "anon group"; 622 return evsel->group_name ?: "anon group";
622} 623}
623 624
625/*
626 * Returns the group details for the specified leader,
627 * with following rules.
628 *
629 * For record -e '{cycles,instructions}'
630 * 'anon group { cycles:u, instructions:u }'
631 *
632 * For record -e 'cycles,instructions' and report --group
633 * 'cycles:u, instructions:u'
634 */
624int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) 635int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
625{ 636{
626 int ret; 637 int ret = 0;
627 struct perf_evsel *pos; 638 struct perf_evsel *pos;
628 const char *group_name = perf_evsel__group_name(evsel); 639 const char *group_name = perf_evsel__group_name(evsel);
629 640
630 ret = scnprintf(buf, size, "%s", group_name); 641 if (!evsel->forced_leader)
642 ret = scnprintf(buf, size, "%s { ", group_name);
631 643
632 ret += scnprintf(buf + ret, size - ret, " { %s", 644 ret += scnprintf(buf + ret, size - ret, "%s",
633 perf_evsel__name(evsel)); 645 perf_evsel__name(evsel));
634 646
635 for_each_group_member(pos, evsel) 647 for_each_group_member(pos, evsel)
636 ret += scnprintf(buf + ret, size - ret, ", %s", 648 ret += scnprintf(buf + ret, size - ret, ", %s",
637 perf_evsel__name(pos)); 649 perf_evsel__name(pos));
638 650
639 ret += scnprintf(buf + ret, size - ret, " }"); 651 if (!evsel->forced_leader)
652 ret += scnprintf(buf + ret, size - ret, " }");
640 653
641 return ret; 654 return ret;
642} 655}
@@ -1233,7 +1246,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
1233 perf_evsel__free_fd(evsel); 1246 perf_evsel__free_fd(evsel);
1234 perf_evsel__free_id(evsel); 1247 perf_evsel__free_id(evsel);
1235 perf_evsel__free_config_terms(evsel); 1248 perf_evsel__free_config_terms(evsel);
1236 close_cgroup(evsel->cgrp); 1249 cgroup__put(evsel->cgrp);
1237 cpu_map__put(evsel->cpus); 1250 cpu_map__put(evsel->cpus);
1238 cpu_map__put(evsel->own_cpus); 1251 cpu_map__put(evsel->own_cpus);
1239 thread_map__put(evsel->threads); 1252 thread_map__put(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a7487c6d1866..d3ee3af618ef 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -30,7 +30,7 @@ struct perf_sample_id {
30 u64 period; 30 u64 period;
31}; 31};
32 32
33struct cgroup_sel; 33struct cgroup;
34 34
35/* 35/*
36 * The 'struct perf_evsel_config_term' is used to pass event 36 * The 'struct perf_evsel_config_term' is used to pass event
@@ -107,7 +107,7 @@ struct perf_evsel {
107 struct perf_stat_evsel *stats; 107 struct perf_stat_evsel *stats;
108 void *priv; 108 void *priv;
109 u64 db_id; 109 u64 db_id;
110 struct cgroup_sel *cgrp; 110 struct cgroup *cgrp;
111 void *handler; 111 void *handler;
112 struct cpu_map *cpus; 112 struct cpu_map *cpus;
113 struct cpu_map *own_cpus; 113 struct cpu_map *own_cpus;
@@ -125,6 +125,7 @@ struct perf_evsel {
125 bool per_pkg; 125 bool per_pkg;
126 bool precise_max; 126 bool precise_max;
127 bool ignore_missing_thread; 127 bool ignore_missing_thread;
128 bool forced_leader;
128 /* parse modifier helper */ 129 /* parse modifier helper */
129 int exclude_GH; 130 int exclude_GH;
130 int nr_members; 131 int nr_members;
@@ -142,6 +143,7 @@ struct perf_evsel {
142 struct perf_evsel **metric_events; 143 struct perf_evsel **metric_events;
143 bool collect_stat; 144 bool collect_stat;
144 bool weak_group; 145 bool weak_group;
146 const char *pmu_name;
145}; 147};
146 148
147union u64_swap { 149union u64_swap {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a326e0d8b5b6..e14b3f7c7212 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -17,6 +17,7 @@
17#include <sys/stat.h> 17#include <sys/stat.h>
18#include <sys/utsname.h> 18#include <sys/utsname.h>
19#include <linux/time64.h> 19#include <linux/time64.h>
20#include <dirent.h>
20 21
21#include "evlist.h" 22#include "evlist.h"
22#include "evsel.h" 23#include "evsel.h"
@@ -37,6 +38,7 @@
37#include "asm/bug.h" 38#include "asm/bug.h"
38#include "tool.h" 39#include "tool.h"
39#include "time-utils.h" 40#include "time-utils.h"
41#include "units.h"
40 42
41#include "sane_ctype.h" 43#include "sane_ctype.h"
42 44
@@ -132,6 +134,25 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size)
132} 134}
133 135
134/* Return: 0 if succeded, -ERR if failed. */ 136/* Return: 0 if succeded, -ERR if failed. */
137static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
138{
139 u64 *p = (u64 *) set;
140 int i, ret;
141
142 ret = do_write(ff, &size, sizeof(size));
143 if (ret < 0)
144 return ret;
145
146 for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
147 ret = do_write(ff, p + i, sizeof(*p));
148 if (ret < 0)
149 return ret;
150 }
151
152 return 0;
153}
154
155/* Return: 0 if succeded, -ERR if failed. */
135int write_padded(struct feat_fd *ff, const void *bf, 156int write_padded(struct feat_fd *ff, const void *bf,
136 size_t count, size_t count_aligned) 157 size_t count, size_t count_aligned)
137{ 158{
@@ -243,6 +264,38 @@ static char *do_read_string(struct feat_fd *ff)
243 return NULL; 264 return NULL;
244} 265}
245 266
267/* Return: 0 if succeded, -ERR if failed. */
268static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
269{
270 unsigned long *set;
271 u64 size, *p;
272 int i, ret;
273
274 ret = do_read_u64(ff, &size);
275 if (ret)
276 return ret;
277
278 set = bitmap_alloc(size);
279 if (!set)
280 return -ENOMEM;
281
282 bitmap_zero(set, size);
283
284 p = (u64 *) set;
285
286 for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
287 ret = do_read_u64(ff, p + i);
288 if (ret < 0) {
289 free(set);
290 return ret;
291 }
292 }
293
294 *pset = set;
295 *psize = size;
296 return 0;
297}
298
246static int write_tracing_data(struct feat_fd *ff, 299static int write_tracing_data(struct feat_fd *ff,
247 struct perf_evlist *evlist) 300 struct perf_evlist *evlist)
248{ 301{
@@ -1196,6 +1249,176 @@ static int write_sample_time(struct feat_fd *ff,
1196 sizeof(evlist->last_sample_time)); 1249 sizeof(evlist->last_sample_time));
1197} 1250}
1198 1251
1252
1253static int memory_node__read(struct memory_node *n, unsigned long idx)
1254{
1255 unsigned int phys, size = 0;
1256 char path[PATH_MAX];
1257 struct dirent *ent;
1258 DIR *dir;
1259
1260#define for_each_memory(mem, dir) \
1261 while ((ent = readdir(dir))) \
1262 if (strcmp(ent->d_name, ".") && \
1263 strcmp(ent->d_name, "..") && \
1264 sscanf(ent->d_name, "memory%u", &mem) == 1)
1265
1266 scnprintf(path, PATH_MAX,
1267 "%s/devices/system/node/node%lu",
1268 sysfs__mountpoint(), idx);
1269
1270 dir = opendir(path);
1271 if (!dir) {
1272 pr_warning("failed: cant' open memory sysfs data\n");
1273 return -1;
1274 }
1275
1276 for_each_memory(phys, dir) {
1277 size = max(phys, size);
1278 }
1279
1280 size++;
1281
1282 n->set = bitmap_alloc(size);
1283 if (!n->set) {
1284 closedir(dir);
1285 return -ENOMEM;
1286 }
1287
1288 bitmap_zero(n->set, size);
1289 n->node = idx;
1290 n->size = size;
1291
1292 rewinddir(dir);
1293
1294 for_each_memory(phys, dir) {
1295 set_bit(phys, n->set);
1296 }
1297
1298 closedir(dir);
1299 return 0;
1300}
1301
1302static int memory_node__sort(const void *a, const void *b)
1303{
1304 const struct memory_node *na = a;
1305 const struct memory_node *nb = b;
1306
1307 return na->node - nb->node;
1308}
1309
1310static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
1311{
1312 char path[PATH_MAX];
1313 struct dirent *ent;
1314 DIR *dir;
1315 u64 cnt = 0;
1316 int ret = 0;
1317
1318 scnprintf(path, PATH_MAX, "%s/devices/system/node/",
1319 sysfs__mountpoint());
1320
1321 dir = opendir(path);
1322 if (!dir) {
1323 pr_warning("failed: can't open node sysfs data\n");
1324 return -1;
1325 }
1326
1327 while (!ret && (ent = readdir(dir))) {
1328 unsigned int idx;
1329 int r;
1330
1331 if (!strcmp(ent->d_name, ".") ||
1332 !strcmp(ent->d_name, ".."))
1333 continue;
1334
1335 r = sscanf(ent->d_name, "node%u", &idx);
1336 if (r != 1)
1337 continue;
1338
1339 if (WARN_ONCE(cnt >= size,
1340 "failed to write MEM_TOPOLOGY, way too many nodes\n"))
1341 return -1;
1342
1343 ret = memory_node__read(&nodes[cnt++], idx);
1344 }
1345
1346 *cntp = cnt;
1347 closedir(dir);
1348
1349 if (!ret)
1350 qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
1351
1352 return ret;
1353}
1354
1355#define MAX_MEMORY_NODES 2000
1356
1357/*
1358 * The MEM_TOPOLOGY holds physical memory map for every
1359 * node in system. The format of data is as follows:
1360 *
1361 * 0 - version | for future changes
1362 * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
1363 * 16 - count | number of nodes
1364 *
1365 * For each node we store map of physical indexes for
1366 * each node:
1367 *
1368 * 32 - node id | node index
1369 * 40 - size | size of bitmap
1370 * 48 - bitmap | bitmap of memory indexes that belongs to node
1371 */
1372static int write_mem_topology(struct feat_fd *ff __maybe_unused,
1373 struct perf_evlist *evlist __maybe_unused)
1374{
1375 static struct memory_node nodes[MAX_MEMORY_NODES];
1376 u64 bsize, version = 1, i, nr;
1377 int ret;
1378
1379 ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
1380 (unsigned long long *) &bsize);
1381 if (ret)
1382 return ret;
1383
1384 ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
1385 if (ret)
1386 return ret;
1387
1388 ret = do_write(ff, &version, sizeof(version));
1389 if (ret < 0)
1390 goto out;
1391
1392 ret = do_write(ff, &bsize, sizeof(bsize));
1393 if (ret < 0)
1394 goto out;
1395
1396 ret = do_write(ff, &nr, sizeof(nr));
1397 if (ret < 0)
1398 goto out;
1399
1400 for (i = 0; i < nr; i++) {
1401 struct memory_node *n = &nodes[i];
1402
1403 #define _W(v) \
1404 ret = do_write(ff, &n->v, sizeof(n->v)); \
1405 if (ret < 0) \
1406 goto out;
1407
1408 _W(node)
1409 _W(size)
1410
1411 #undef _W
1412
1413 ret = do_write_bitmap(ff, n->set, n->size);
1414 if (ret < 0)
1415 goto out;
1416 }
1417
1418out:
1419 return ret;
1420}
1421
1199static void print_hostname(struct feat_fd *ff, FILE *fp) 1422static void print_hostname(struct feat_fd *ff, FILE *fp)
1200{ 1423{
1201 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); 1424 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1543,6 +1766,35 @@ static void print_sample_time(struct feat_fd *ff, FILE *fp)
1543 fprintf(fp, "# sample duration : %10.3f ms\n", d); 1766 fprintf(fp, "# sample duration : %10.3f ms\n", d);
1544} 1767}
1545 1768
1769static void memory_node__fprintf(struct memory_node *n,
1770 unsigned long long bsize, FILE *fp)
1771{
1772 char buf_map[100], buf_size[50];
1773 unsigned long long size;
1774
1775 size = bsize * bitmap_weight(n->set, n->size);
1776 unit_number__scnprintf(buf_size, 50, size);
1777
1778 bitmap_scnprintf(n->set, n->size, buf_map, 100);
1779 fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
1780}
1781
1782static void print_mem_topology(struct feat_fd *ff, FILE *fp)
1783{
1784 struct memory_node *nodes;
1785 int i, nr;
1786
1787 nodes = ff->ph->env.memory_nodes;
1788 nr = ff->ph->env.nr_memory_nodes;
1789
1790 fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
1791 nr, ff->ph->env.memory_bsize);
1792
1793 for (i = 0; i < nr; i++) {
1794 memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
1795 }
1796}
1797
1546static int __event_process_build_id(struct build_id_event *bev, 1798static int __event_process_build_id(struct build_id_event *bev,
1547 char *filename, 1799 char *filename,
1548 struct perf_session *session) 1800 struct perf_session *session)
@@ -2205,6 +2457,58 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
2205 return 0; 2457 return 0;
2206} 2458}
2207 2459
2460static int process_mem_topology(struct feat_fd *ff,
2461 void *data __maybe_unused)
2462{
2463 struct memory_node *nodes;
2464 u64 version, i, nr, bsize;
2465 int ret = -1;
2466
2467 if (do_read_u64(ff, &version))
2468 return -1;
2469
2470 if (version != 1)
2471 return -1;
2472
2473 if (do_read_u64(ff, &bsize))
2474 return -1;
2475
2476 if (do_read_u64(ff, &nr))
2477 return -1;
2478
2479 nodes = zalloc(sizeof(*nodes) * nr);
2480 if (!nodes)
2481 return -1;
2482
2483 for (i = 0; i < nr; i++) {
2484 struct memory_node n;
2485
2486 #define _R(v) \
2487 if (do_read_u64(ff, &n.v)) \
2488 goto out; \
2489
2490 _R(node)
2491 _R(size)
2492
2493 #undef _R
2494
2495 if (do_read_bitmap(ff, &n.set, &n.size))
2496 goto out;
2497
2498 nodes[i] = n;
2499 }
2500
2501 ff->ph->env.memory_bsize = bsize;
2502 ff->ph->env.memory_nodes = nodes;
2503 ff->ph->env.nr_memory_nodes = nr;
2504 ret = 0;
2505
2506out:
2507 if (ret)
2508 free(nodes);
2509 return ret;
2510}
2511
2208struct feature_ops { 2512struct feature_ops {
2209 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); 2513 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
2210 void (*print)(struct feat_fd *ff, FILE *fp); 2514 void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2263,6 +2567,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
2263 FEAT_OPN(STAT, stat, false), 2567 FEAT_OPN(STAT, stat, false),
2264 FEAT_OPN(CACHE, cache, true), 2568 FEAT_OPN(CACHE, cache, true),
2265 FEAT_OPR(SAMPLE_TIME, sample_time, false), 2569 FEAT_OPR(SAMPLE_TIME, sample_time, false),
2570 FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
2266}; 2571};
2267 2572
2268struct header_print_data { 2573struct header_print_data {
@@ -2318,7 +2623,12 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
2318 if (ret == -1) 2623 if (ret == -1)
2319 return -1; 2624 return -1;
2320 2625
2321 fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); 2626 fprintf(fp, "# captured on : %s", ctime(&st.st_ctime));
2627
2628 fprintf(fp, "# header version : %u\n", header->version);
2629 fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset);
2630 fprintf(fp, "# data size : %" PRIu64 "\n", header->data_size);
2631 fprintf(fp, "# feat offset : %" PRIu64 "\n", header->feat_offset);
2322 2632
2323 perf_header__process_sections(header, fd, &hd, 2633 perf_header__process_sections(header, fd, &hd,
2324 perf_file_section__fprintf_info); 2634 perf_file_section__fprintf_info);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 942bdec6d70d..90d4577a92dc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -36,6 +36,7 @@ enum {
36 HEADER_STAT, 36 HEADER_STAT,
37 HEADER_CACHE, 37 HEADER_CACHE,
38 HEADER_SAMPLE_TIME, 38 HEADER_SAMPLE_TIME,
39 HEADER_MEM_TOPOLOGY,
39 HEADER_LAST_FEATURE, 40 HEADER_LAST_FEATURE,
40 HEADER_FEAT_BITS = 256, 41 HEADER_FEAT_BITS = 256,
41}; 42};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 44a8456cea10..7d968892ee39 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -536,7 +536,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
536 * This mem info was allocated from sample__resolve_mem 536 * This mem info was allocated from sample__resolve_mem
537 * and will not be used anymore. 537 * and will not be used anymore.
538 */ 538 */
539 zfree(&entry->mem_info); 539 mem_info__zput(entry->mem_info);
540 540
541 /* If the map of an existing hist_entry has 541 /* If the map of an existing hist_entry has
542 * become out-of-date due to an exec() or 542 * become out-of-date due to an exec() or
@@ -1139,7 +1139,7 @@ void hist_entry__delete(struct hist_entry *he)
1139 if (he->mem_info) { 1139 if (he->mem_info) {
1140 map__zput(he->mem_info->iaddr.map); 1140 map__zput(he->mem_info->iaddr.map);
1141 map__zput(he->mem_info->daddr.map); 1141 map__zput(he->mem_info->daddr.map);
1142 zfree(&he->mem_info); 1142 mem_info__zput(he->mem_info);
1143 } 1143 }
1144 1144
1145 zfree(&he->stat_acc); 1145 zfree(&he->stat_acc);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index aa1593ce551d..f9157aed1289 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1378,6 +1378,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1378 intel_pt_clear_tx_flags(decoder); 1378 intel_pt_clear_tx_flags(decoder);
1379 decoder->have_tma = false; 1379 decoder->have_tma = false;
1380 decoder->cbr = 0; 1380 decoder->cbr = 0;
1381 decoder->timestamp_insn_cnt = 0;
1381 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; 1382 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1382 decoder->overflow = true; 1383 decoder->overflow = true;
1383 return -EOVERFLOW; 1384 return -EOVERFLOW;
@@ -1616,6 +1617,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1616 case INTEL_PT_PWRX: 1617 case INTEL_PT_PWRX:
1617 intel_pt_log("ERROR: Missing TIP after FUP\n"); 1618 intel_pt_log("ERROR: Missing TIP after FUP\n");
1618 decoder->pkt_state = INTEL_PT_STATE_ERR3; 1619 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1620 decoder->pkt_step = 0;
1619 return -ENOENT; 1621 return -ENOENT;
1620 1622
1621 case INTEL_PT_OVF: 1623 case INTEL_PT_OVF:
@@ -2390,14 +2392,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2390 return &decoder->state; 2392 return &decoder->state;
2391} 2393}
2392 2394
2393static bool intel_pt_at_psb(unsigned char *buf, size_t len)
2394{
2395 if (len < INTEL_PT_PSB_LEN)
2396 return false;
2397 return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
2398 INTEL_PT_PSB_LEN);
2399}
2400
2401/** 2395/**
2402 * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. 2396 * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
2403 * @buf: pointer to buffer pointer 2397 * @buf: pointer to buffer pointer
@@ -2486,6 +2480,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
2486 * @buf: buffer 2480 * @buf: buffer
2487 * @len: size of buffer 2481 * @len: size of buffer
2488 * @tsc: TSC value returned 2482 * @tsc: TSC value returned
2483 * @rem: returns remaining size when TSC is found
2489 * 2484 *
2490 * Find a TSC packet in @buf and return the TSC value. This function assumes 2485 * Find a TSC packet in @buf and return the TSC value. This function assumes
2491 * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a 2486 * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
@@ -2493,7 +2488,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
2493 * 2488 *
2494 * Return: %true if TSC is found, false otherwise. 2489 * Return: %true if TSC is found, false otherwise.
2495 */ 2490 */
2496static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) 2491static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
2492 size_t *rem)
2497{ 2493{
2498 struct intel_pt_pkt packet; 2494 struct intel_pt_pkt packet;
2499 int ret; 2495 int ret;
@@ -2504,6 +2500,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
2504 return false; 2500 return false;
2505 if (packet.type == INTEL_PT_TSC) { 2501 if (packet.type == INTEL_PT_TSC) {
2506 *tsc = packet.payload; 2502 *tsc = packet.payload;
2503 *rem = len;
2507 return true; 2504 return true;
2508 } 2505 }
2509 if (packet.type == INTEL_PT_PSBEND) 2506 if (packet.type == INTEL_PT_PSBEND)
@@ -2554,6 +2551,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
2554 * @len_a: size of first buffer 2551 * @len_a: size of first buffer
2555 * @buf_b: second buffer 2552 * @buf_b: second buffer
2556 * @len_b: size of second buffer 2553 * @len_b: size of second buffer
2554 * @consecutive: returns true if there is data in buf_b that is consecutive
2555 * to buf_a
2557 * 2556 *
2558 * If the trace contains TSC we can look at the last TSC of @buf_a and the 2557 * If the trace contains TSC we can look at the last TSC of @buf_a and the
2559 * first TSC of @buf_b in order to determine if the buffers overlap, and then 2558 * first TSC of @buf_b in order to determine if the buffers overlap, and then
@@ -2566,33 +2565,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
2566static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, 2565static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
2567 size_t len_a, 2566 size_t len_a,
2568 unsigned char *buf_b, 2567 unsigned char *buf_b,
2569 size_t len_b) 2568 size_t len_b, bool *consecutive)
2570{ 2569{
2571 uint64_t tsc_a, tsc_b; 2570 uint64_t tsc_a, tsc_b;
2572 unsigned char *p; 2571 unsigned char *p;
2573 size_t len; 2572 size_t len, rem_a, rem_b;
2574 2573
2575 p = intel_pt_last_psb(buf_a, len_a); 2574 p = intel_pt_last_psb(buf_a, len_a);
2576 if (!p) 2575 if (!p)
2577 return buf_b; /* No PSB in buf_a => no overlap */ 2576 return buf_b; /* No PSB in buf_a => no overlap */
2578 2577
2579 len = len_a - (p - buf_a); 2578 len = len_a - (p - buf_a);
2580 if (!intel_pt_next_tsc(p, len, &tsc_a)) { 2579 if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
2581 /* The last PSB+ in buf_a is incomplete, so go back one more */ 2580 /* The last PSB+ in buf_a is incomplete, so go back one more */
2582 len_a -= len; 2581 len_a -= len;
2583 p = intel_pt_last_psb(buf_a, len_a); 2582 p = intel_pt_last_psb(buf_a, len_a);
2584 if (!p) 2583 if (!p)
2585 return buf_b; /* No full PSB+ => assume no overlap */ 2584 return buf_b; /* No full PSB+ => assume no overlap */
2586 len = len_a - (p - buf_a); 2585 len = len_a - (p - buf_a);
2587 if (!intel_pt_next_tsc(p, len, &tsc_a)) 2586 if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
2588 return buf_b; /* No TSC in buf_a => assume no overlap */ 2587 return buf_b; /* No TSC in buf_a => assume no overlap */
2589 } 2588 }
2590 2589
2591 while (1) { 2590 while (1) {
2592 /* Ignore PSB+ with no TSC */ 2591 /* Ignore PSB+ with no TSC */
2593 if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && 2592 if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
2594 intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) 2593 int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
2595 return buf_b; /* tsc_a < tsc_b => no overlap */ 2594
2595 /* Same TSC, so buffers are consecutive */
2596 if (!cmp && rem_b >= rem_a) {
2597 *consecutive = true;
2598 return buf_b + len_b - (rem_b - rem_a);
2599 }
2600 if (cmp < 0)
2601 return buf_b; /* tsc_a < tsc_b => no overlap */
2602 }
2596 2603
2597 if (!intel_pt_step_psb(&buf_b, &len_b)) 2604 if (!intel_pt_step_psb(&buf_b, &len_b))
2598 return buf_b + len_b; /* No PSB in buf_b => no data */ 2605 return buf_b + len_b; /* No PSB in buf_b => no data */
@@ -2606,6 +2613,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
2606 * @buf_b: second buffer 2613 * @buf_b: second buffer
2607 * @len_b: size of second buffer 2614 * @len_b: size of second buffer
2608 * @have_tsc: can use TSC packets to detect overlap 2615 * @have_tsc: can use TSC packets to detect overlap
2616 * @consecutive: returns true if there is data in buf_b that is consecutive
2617 * to buf_a
2609 * 2618 *
2610 * When trace samples or snapshots are recorded there is the possibility that 2619 * When trace samples or snapshots are recorded there is the possibility that
2611 * the data overlaps. Note that, for the purposes of decoding, data is only 2620 * the data overlaps. Note that, for the purposes of decoding, data is only
@@ -2616,7 +2625,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
2616 */ 2625 */
2617unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, 2626unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2618 unsigned char *buf_b, size_t len_b, 2627 unsigned char *buf_b, size_t len_b,
2619 bool have_tsc) 2628 bool have_tsc, bool *consecutive)
2620{ 2629{
2621 unsigned char *found; 2630 unsigned char *found;
2622 2631
@@ -2628,7 +2637,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2628 return buf_b; /* No overlap */ 2637 return buf_b; /* No overlap */
2629 2638
2630 if (have_tsc) { 2639 if (have_tsc) {
2631 found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); 2640 found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
2641 consecutive);
2632 if (found) 2642 if (found)
2633 return found; 2643 return found;
2634 } 2644 }
@@ -2643,28 +2653,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2643 } 2653 }
2644 2654
2645 /* Now len_b >= len_a */ 2655 /* Now len_b >= len_a */
2646 if (len_b > len_a) {
2647 /* The leftover buffer 'b' must start at a PSB */
2648 while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
2649 if (!intel_pt_step_psb(&buf_a, &len_a))
2650 return buf_b; /* No overlap */
2651 }
2652 }
2653
2654 while (1) { 2656 while (1) {
2655 /* Potential overlap so check the bytes */ 2657 /* Potential overlap so check the bytes */
2656 found = memmem(buf_a, len_a, buf_b, len_a); 2658 found = memmem(buf_a, len_a, buf_b, len_a);
2657 if (found) 2659 if (found) {
2660 *consecutive = true;
2658 return buf_b + len_a; 2661 return buf_b + len_a;
2662 }
2659 2663
2660 /* Try again at next PSB in buffer 'a' */ 2664 /* Try again at next PSB in buffer 'a' */
2661 if (!intel_pt_step_psb(&buf_a, &len_a)) 2665 if (!intel_pt_step_psb(&buf_a, &len_a))
2662 return buf_b; /* No overlap */ 2666 return buf_b; /* No overlap */
2663
2664 /* The leftover buffer 'b' must start at a PSB */
2665 while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
2666 if (!intel_pt_step_psb(&buf_a, &len_a))
2667 return buf_b; /* No overlap */
2668 }
2669 } 2667 }
2670} 2668}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 921b22e8ca0e..fc1752d50019 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -117,7 +117,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
117 117
118unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, 118unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
119 unsigned char *buf_b, size_t len_b, 119 unsigned char *buf_b, size_t len_b,
120 bool have_tsc); 120 bool have_tsc, bool *consecutive);
121 121
122int intel_pt__strerror(int code, char *buf, size_t buflen); 122int intel_pt__strerror(int code, char *buf, size_t buflen);
123 123
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 3773d9c54f45..0effaff57020 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -132,6 +132,7 @@ struct intel_pt_queue {
132 struct intel_pt *pt; 132 struct intel_pt *pt;
133 unsigned int queue_nr; 133 unsigned int queue_nr;
134 struct auxtrace_buffer *buffer; 134 struct auxtrace_buffer *buffer;
135 struct auxtrace_buffer *old_buffer;
135 void *decoder; 136 void *decoder;
136 const struct intel_pt_state *state; 137 const struct intel_pt_state *state;
137 struct ip_callchain *chain; 138 struct ip_callchain *chain;
@@ -143,6 +144,7 @@ struct intel_pt_queue {
143 bool stop; 144 bool stop;
144 bool step_through_buffers; 145 bool step_through_buffers;
145 bool use_buffer_pid_tid; 146 bool use_buffer_pid_tid;
147 bool sync_switch;
146 pid_t pid, tid; 148 pid_t pid, tid;
147 int cpu; 149 int cpu;
148 int switch_state; 150 int switch_state;
@@ -207,49 +209,28 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
207static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, 209static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
208 struct auxtrace_buffer *b) 210 struct auxtrace_buffer *b)
209{ 211{
212 bool consecutive = false;
210 void *start; 213 void *start;
211 214
212 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, 215 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
213 pt->have_tsc); 216 pt->have_tsc, &consecutive);
214 if (!start) 217 if (!start)
215 return -EINVAL; 218 return -EINVAL;
216 b->use_size = b->data + b->size - start; 219 b->use_size = b->data + b->size - start;
217 b->use_data = start; 220 b->use_data = start;
221 if (b->use_size && consecutive)
222 b->consecutive = true;
218 return 0; 223 return 0;
219} 224}
220 225
221static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
222 struct auxtrace_queue *queue,
223 struct auxtrace_buffer *buffer)
224{
225 if (queue->cpu == -1 && buffer->cpu != -1)
226 ptq->cpu = buffer->cpu;
227
228 ptq->pid = buffer->pid;
229 ptq->tid = buffer->tid;
230
231 intel_pt_log("queue %u cpu %d pid %d tid %d\n",
232 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
233
234 thread__zput(ptq->thread);
235
236 if (ptq->tid != -1) {
237 if (ptq->pid != -1)
238 ptq->thread = machine__findnew_thread(ptq->pt->machine,
239 ptq->pid,
240 ptq->tid);
241 else
242 ptq->thread = machine__find_thread(ptq->pt->machine, -1,
243 ptq->tid);
244 }
245}
246
247/* This function assumes data is processed sequentially only */ 226/* This function assumes data is processed sequentially only */
248static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 227static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
249{ 228{
250 struct intel_pt_queue *ptq = data; 229 struct intel_pt_queue *ptq = data;
251 struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; 230 struct auxtrace_buffer *buffer = ptq->buffer;
231 struct auxtrace_buffer *old_buffer = ptq->old_buffer;
252 struct auxtrace_queue *queue; 232 struct auxtrace_queue *queue;
233 bool might_overlap;
253 234
254 if (ptq->stop) { 235 if (ptq->stop) {
255 b->len = 0; 236 b->len = 0;
@@ -257,7 +238,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
257 } 238 }
258 239
259 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 240 queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
260next: 241
261 buffer = auxtrace_buffer__next(queue, buffer); 242 buffer = auxtrace_buffer__next(queue, buffer);
262 if (!buffer) { 243 if (!buffer) {
263 if (old_buffer) 244 if (old_buffer)
@@ -276,7 +257,8 @@ next:
276 return -ENOMEM; 257 return -ENOMEM;
277 } 258 }
278 259
279 if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && 260 might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
261 if (might_overlap && !buffer->consecutive && old_buffer &&
280 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) 262 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
281 return -ENOMEM; 263 return -ENOMEM;
282 264
@@ -289,33 +271,24 @@ next:
289 } 271 }
290 b->ref_timestamp = buffer->reference; 272 b->ref_timestamp = buffer->reference;
291 273
292 /* 274 if (!old_buffer || (might_overlap && !buffer->consecutive)) {
293 * If in snapshot mode and the buffer has no usable data, get next
294 * buffer and again check overlap against old_buffer.
295 */
296 if (ptq->pt->snapshot_mode && !b->len)
297 goto next;
298
299 if (old_buffer)
300 auxtrace_buffer__drop_data(old_buffer);
301
302 if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
303 !buffer->consecutive)) {
304 b->consecutive = false; 275 b->consecutive = false;
305 b->trace_nr = buffer->buffer_nr + 1; 276 b->trace_nr = buffer->buffer_nr + 1;
306 } else { 277 } else {
307 b->consecutive = true; 278 b->consecutive = true;
308 } 279 }
309 280
310 if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
311 ptq->tid != buffer->tid))
312 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
313
314 if (ptq->step_through_buffers) 281 if (ptq->step_through_buffers)
315 ptq->stop = true; 282 ptq->stop = true;
316 283
317 if (!b->len) 284 if (b->len) {
285 if (old_buffer)
286 auxtrace_buffer__drop_data(old_buffer);
287 ptq->old_buffer = buffer;
288 } else {
289 auxtrace_buffer__drop_data(buffer);
318 return intel_pt_get_trace(b, data); 290 return intel_pt_get_trace(b, data);
291 }
319 292
320 return 0; 293 return 0;
321} 294}
@@ -954,16 +927,15 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
954 ptq->cpu = queue->cpu; 927 ptq->cpu = queue->cpu;
955 ptq->tid = queue->tid; 928 ptq->tid = queue->tid;
956 929
957 if (pt->sampling_mode) { 930 if (pt->sampling_mode && !pt->snapshot_mode &&
958 if (pt->timeless_decoding) 931 pt->timeless_decoding)
959 ptq->step_through_buffers = true; 932 ptq->step_through_buffers = true;
960 if (pt->timeless_decoding || !pt->have_sched_switch) 933
961 ptq->use_buffer_pid_tid = true; 934 ptq->sync_switch = pt->sync_switch;
962 }
963 } 935 }
964 936
965 if (!ptq->on_heap && 937 if (!ptq->on_heap &&
966 (!pt->sync_switch || 938 (!ptq->sync_switch ||
967 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { 939 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
968 const struct intel_pt_state *state; 940 const struct intel_pt_state *state;
969 int ret; 941 int ret;
@@ -1546,7 +1518,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1546 if (pt->synth_opts.last_branch) 1518 if (pt->synth_opts.last_branch)
1547 intel_pt_update_last_branch_rb(ptq); 1519 intel_pt_update_last_branch_rb(ptq);
1548 1520
1549 if (!pt->sync_switch) 1521 if (!ptq->sync_switch)
1550 return 0; 1522 return 0;
1551 1523
1552 if (intel_pt_is_switch_ip(ptq, state->to_ip)) { 1524 if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
@@ -1627,6 +1599,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1627 return switch_ip; 1599 return switch_ip;
1628} 1600}
1629 1601
1602static void intel_pt_enable_sync_switch(struct intel_pt *pt)
1603{
1604 unsigned int i;
1605
1606 pt->sync_switch = true;
1607
1608 for (i = 0; i < pt->queues.nr_queues; i++) {
1609 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1610 struct intel_pt_queue *ptq = queue->priv;
1611
1612 if (ptq)
1613 ptq->sync_switch = true;
1614 }
1615}
1616
1630static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 1617static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1631{ 1618{
1632 const struct intel_pt_state *state = ptq->state; 1619 const struct intel_pt_state *state = ptq->state;
@@ -1643,7 +1630,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1643 if (pt->switch_ip) { 1630 if (pt->switch_ip) {
1644 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", 1631 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1645 pt->switch_ip, pt->ptss_ip); 1632 pt->switch_ip, pt->ptss_ip);
1646 pt->sync_switch = true; 1633 intel_pt_enable_sync_switch(pt);
1647 } 1634 }
1648 } 1635 }
1649 } 1636 }
@@ -1659,9 +1646,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1659 if (state->err) { 1646 if (state->err) {
1660 if (state->err == INTEL_PT_ERR_NODATA) 1647 if (state->err == INTEL_PT_ERR_NODATA)
1661 return 1; 1648 return 1;
1662 if (pt->sync_switch && 1649 if (ptq->sync_switch &&
1663 state->from_ip >= pt->kernel_start) { 1650 state->from_ip >= pt->kernel_start) {
1664 pt->sync_switch = false; 1651 ptq->sync_switch = false;
1665 intel_pt_next_tid(pt, ptq); 1652 intel_pt_next_tid(pt, ptq);
1666 } 1653 }
1667 if (pt->synth_opts.errors) { 1654 if (pt->synth_opts.errors) {
@@ -1687,7 +1674,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1687 state->timestamp, state->est_timestamp); 1674 state->timestamp, state->est_timestamp);
1688 ptq->timestamp = state->est_timestamp; 1675 ptq->timestamp = state->est_timestamp;
1689 /* Use estimated TSC in unknown switch state */ 1676 /* Use estimated TSC in unknown switch state */
1690 } else if (pt->sync_switch && 1677 } else if (ptq->sync_switch &&
1691 ptq->switch_state == INTEL_PT_SS_UNKNOWN && 1678 ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1692 intel_pt_is_switch_ip(ptq, state->to_ip) && 1679 intel_pt_is_switch_ip(ptq, state->to_ip) &&
1693 ptq->next_tid == -1) { 1680 ptq->next_tid == -1) {
@@ -1834,7 +1821,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1834 return 1; 1821 return 1;
1835 1822
1836 ptq = intel_pt_cpu_to_ptq(pt, cpu); 1823 ptq = intel_pt_cpu_to_ptq(pt, cpu);
1837 if (!ptq) 1824 if (!ptq || !ptq->sync_switch)
1838 return 1; 1825 return 1;
1839 1826
1840 switch (ptq->switch_state) { 1827 switch (ptq->switch_state) {
@@ -2075,9 +2062,6 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session,
2075 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 2062 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2076 auxtrace); 2063 auxtrace);
2077 2064
2078 if (pt->sampling_mode)
2079 return 0;
2080
2081 if (!pt->data_queued) { 2065 if (!pt->data_queued) {
2082 struct auxtrace_buffer *buffer; 2066 struct auxtrace_buffer *buffer;
2083 off_t data_offset; 2067 off_t data_offset;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 12b7427444a3..43fbbee409ec 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1697,7 +1697,7 @@ static void ip__resolve_data(struct thread *thread,
1697struct mem_info *sample__resolve_mem(struct perf_sample *sample, 1697struct mem_info *sample__resolve_mem(struct perf_sample *sample,
1698 struct addr_location *al) 1698 struct addr_location *al)
1699{ 1699{
1700 struct mem_info *mi = zalloc(sizeof(*mi)); 1700 struct mem_info *mi = mem_info__new();
1701 1701
1702 if (!mi) 1702 if (!mi)
1703 return NULL; 1703 return NULL;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 4f27c464ce0b..074c4fd3b67e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -75,9 +75,7 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
75 * } 75 * }
76 * perf_mmap__read_done() 76 * perf_mmap__read_done()
77 */ 77 */
78union perf_event *perf_mmap__read_event(struct perf_mmap *map, 78union perf_event *perf_mmap__read_event(struct perf_mmap *map)
79 bool overwrite,
80 u64 *startp, u64 end)
81{ 79{
82 union perf_event *event; 80 union perf_event *event;
83 81
@@ -87,17 +85,14 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
87 if (!refcount_read(&map->refcnt)) 85 if (!refcount_read(&map->refcnt))
88 return NULL; 86 return NULL;
89 87
90 if (startp == NULL)
91 return NULL;
92
93 /* non-overwirte doesn't pause the ringbuffer */ 88 /* non-overwirte doesn't pause the ringbuffer */
94 if (!overwrite) 89 if (!map->overwrite)
95 end = perf_mmap__read_head(map); 90 map->end = perf_mmap__read_head(map);
96 91
97 event = perf_mmap__read(map, startp, end); 92 event = perf_mmap__read(map, &map->start, map->end);
98 93
99 if (!overwrite) 94 if (!map->overwrite)
100 map->prev = *startp; 95 map->prev = map->start;
101 96
102 return event; 97 return event;
103} 98}
@@ -120,9 +115,9 @@ void perf_mmap__put(struct perf_mmap *map)
120 perf_mmap__munmap(map); 115 perf_mmap__munmap(map);
121} 116}
122 117
123void perf_mmap__consume(struct perf_mmap *map, bool overwrite) 118void perf_mmap__consume(struct perf_mmap *map)
124{ 119{
125 if (!overwrite) { 120 if (!map->overwrite) {
126 u64 old = map->prev; 121 u64 old = map->prev;
127 122
128 perf_mmap__write_tail(map, old); 123 perf_mmap__write_tail(map, old);
@@ -240,27 +235,26 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
240/* 235/*
241 * Report the start and end of the available data in ringbuffer 236 * Report the start and end of the available data in ringbuffer
242 */ 237 */
243int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, 238int perf_mmap__read_init(struct perf_mmap *md)
244 u64 *startp, u64 *endp)
245{ 239{
246 u64 head = perf_mmap__read_head(md); 240 u64 head = perf_mmap__read_head(md);
247 u64 old = md->prev; 241 u64 old = md->prev;
248 unsigned char *data = md->base + page_size; 242 unsigned char *data = md->base + page_size;
249 unsigned long size; 243 unsigned long size;
250 244
251 *startp = overwrite ? head : old; 245 md->start = md->overwrite ? head : old;
252 *endp = overwrite ? old : head; 246 md->end = md->overwrite ? old : head;
253 247
254 if (*startp == *endp) 248 if (md->start == md->end)
255 return -EAGAIN; 249 return -EAGAIN;
256 250
257 size = *endp - *startp; 251 size = md->end - md->start;
258 if (size > (unsigned long)(md->mask) + 1) { 252 if (size > (unsigned long)(md->mask) + 1) {
259 if (!overwrite) { 253 if (!md->overwrite) {
260 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 254 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
261 255
262 md->prev = head; 256 md->prev = head;
263 perf_mmap__consume(md, overwrite); 257 perf_mmap__consume(md);
264 return -EAGAIN; 258 return -EAGAIN;
265 } 259 }
266 260
@@ -268,33 +262,32 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
268 * Backward ring buffer is full. We still have a chance to read 262 * Backward ring buffer is full. We still have a chance to read
269 * most of data from it. 263 * most of data from it.
270 */ 264 */
271 if (overwrite_rb_find_range(data, md->mask, head, startp, endp)) 265 if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
272 return -EINVAL; 266 return -EINVAL;
273 } 267 }
274 268
275 return 0; 269 return 0;
276} 270}
277 271
278int perf_mmap__push(struct perf_mmap *md, bool overwrite, 272int perf_mmap__push(struct perf_mmap *md, void *to,
279 void *to, int push(void *to, void *buf, size_t size)) 273 int push(void *to, void *buf, size_t size))
280{ 274{
281 u64 head = perf_mmap__read_head(md); 275 u64 head = perf_mmap__read_head(md);
282 u64 end, start;
283 unsigned char *data = md->base + page_size; 276 unsigned char *data = md->base + page_size;
284 unsigned long size; 277 unsigned long size;
285 void *buf; 278 void *buf;
286 int rc = 0; 279 int rc = 0;
287 280
288 rc = perf_mmap__read_init(md, overwrite, &start, &end); 281 rc = perf_mmap__read_init(md);
289 if (rc < 0) 282 if (rc < 0)
290 return (rc == -EAGAIN) ? 0 : -1; 283 return (rc == -EAGAIN) ? 0 : -1;
291 284
292 size = end - start; 285 size = md->end - md->start;
293 286
294 if ((start & md->mask) + size != (end & md->mask)) { 287 if ((md->start & md->mask) + size != (md->end & md->mask)) {
295 buf = &data[start & md->mask]; 288 buf = &data[md->start & md->mask];
296 size = md->mask + 1 - (start & md->mask); 289 size = md->mask + 1 - (md->start & md->mask);
297 start += size; 290 md->start += size;
298 291
299 if (push(to, buf, size) < 0) { 292 if (push(to, buf, size) < 0) {
300 rc = -1; 293 rc = -1;
@@ -302,9 +295,9 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
302 } 295 }
303 } 296 }
304 297
305 buf = &data[start & md->mask]; 298 buf = &data[md->start & md->mask];
306 size = end - start; 299 size = md->end - md->start;
307 start += size; 300 md->start += size;
308 301
309 if (push(to, buf, size) < 0) { 302 if (push(to, buf, size) < 0) {
310 rc = -1; 303 rc = -1;
@@ -312,7 +305,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
312 } 305 }
313 306
314 md->prev = head; 307 md->prev = head;
315 perf_mmap__consume(md, overwrite); 308 perf_mmap__consume(md);
316out: 309out:
317 return rc; 310 return rc;
318} 311}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index ec7d3a24e276..d82294db1295 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -20,6 +20,9 @@ struct perf_mmap {
20 int fd; 20 int fd;
21 refcount_t refcnt; 21 refcount_t refcnt;
22 u64 prev; 22 u64 prev;
23 u64 start;
24 u64 end;
25 bool overwrite;
23 struct auxtrace_mmap auxtrace_mmap; 26 struct auxtrace_mmap auxtrace_mmap;
24 char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); 27 char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
25}; 28};
@@ -63,7 +66,7 @@ void perf_mmap__munmap(struct perf_mmap *map);
63void perf_mmap__get(struct perf_mmap *map); 66void perf_mmap__get(struct perf_mmap *map);
64void perf_mmap__put(struct perf_mmap *map); 67void perf_mmap__put(struct perf_mmap *map);
65 68
66void perf_mmap__consume(struct perf_mmap *map, bool overwrite); 69void perf_mmap__consume(struct perf_mmap *map);
67 70
68static inline u64 perf_mmap__read_head(struct perf_mmap *mm) 71static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
69{ 72{
@@ -86,16 +89,13 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
86 89
87union perf_event *perf_mmap__read_forward(struct perf_mmap *map); 90union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
88 91
89union perf_event *perf_mmap__read_event(struct perf_mmap *map, 92union perf_event *perf_mmap__read_event(struct perf_mmap *map);
90 bool overwrite,
91 u64 *startp, u64 end);
92 93
93int perf_mmap__push(struct perf_mmap *md, bool backward, 94int perf_mmap__push(struct perf_mmap *md, void *to,
94 void *to, int push(void *to, void *buf, size_t size)); 95 int push(void *to, void *buf, size_t size));
95 96
96size_t perf_mmap__mmap_len(struct perf_mmap *map); 97size_t perf_mmap__mmap_len(struct perf_mmap *map);
97 98
98int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, 99int perf_mmap__read_init(struct perf_mmap *md);
99 u64 *startp, u64 *endp);
100void perf_mmap__read_done(struct perf_mmap *map); 100void perf_mmap__read_done(struct perf_mmap *map);
101#endif /*__PERF_MMAP_H */ 101#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 34589c427e52..4e80ca320399 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1217,7 +1217,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
1217 get_config_name(head_config), &config_terms); 1217 get_config_name(head_config), &config_terms);
1218} 1218}
1219 1219
1220static int __parse_events_add_pmu(struct parse_events_state *parse_state, 1220int parse_events_add_pmu(struct parse_events_state *parse_state,
1221 struct list_head *list, char *name, 1221 struct list_head *list, char *name,
1222 struct list_head *head_config, bool auto_merge_stats) 1222 struct list_head *head_config, bool auto_merge_stats)
1223{ 1223{
@@ -1247,7 +1247,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
1247 if (!head_config) { 1247 if (!head_config) {
1248 attr.type = pmu->type; 1248 attr.type = pmu->type;
1249 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); 1249 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
1250 return evsel ? 0 : -ENOMEM; 1250 if (evsel) {
1251 evsel->pmu_name = name;
1252 return 0;
1253 } else {
1254 return -ENOMEM;
1255 }
1251 } 1256 }
1252 1257
1253 if (perf_pmu__check_alias(pmu, head_config, &info)) 1258 if (perf_pmu__check_alias(pmu, head_config, &info))
@@ -1276,18 +1281,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
1276 evsel->snapshot = info.snapshot; 1281 evsel->snapshot = info.snapshot;
1277 evsel->metric_expr = info.metric_expr; 1282 evsel->metric_expr = info.metric_expr;
1278 evsel->metric_name = info.metric_name; 1283 evsel->metric_name = info.metric_name;
1284 evsel->pmu_name = name;
1279 } 1285 }
1280 1286
1281 return evsel ? 0 : -ENOMEM; 1287 return evsel ? 0 : -ENOMEM;
1282} 1288}
1283 1289
1284int parse_events_add_pmu(struct parse_events_state *parse_state,
1285 struct list_head *list, char *name,
1286 struct list_head *head_config)
1287{
1288 return __parse_events_add_pmu(parse_state, list, name, head_config, false);
1289}
1290
1291int parse_events_multi_pmu_add(struct parse_events_state *parse_state, 1290int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
1292 char *str, struct list_head **listp) 1291 char *str, struct list_head **listp)
1293{ 1292{
@@ -1317,8 +1316,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
1317 return -1; 1316 return -1;
1318 list_add_tail(&term->list, head); 1317 list_add_tail(&term->list, head);
1319 1318
1320 if (!__parse_events_add_pmu(parse_state, list, 1319 if (!parse_events_add_pmu(parse_state, list,
1321 pmu->name, head, true)) { 1320 pmu->name, head, true)) {
1322 pr_debug("%s -> %s/%s/\n", str, 1321 pr_debug("%s -> %s/%s/\n", str,
1323 pmu->name, alias->str); 1322 pmu->name, alias->str);
1324 ok++; 1323 ok++;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 88108cd11b4c..5015cfd58277 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -167,7 +167,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
167 void *ptr, char *type, u64 len); 167 void *ptr, char *type, u64 len);
168int parse_events_add_pmu(struct parse_events_state *parse_state, 168int parse_events_add_pmu(struct parse_events_state *parse_state,
169 struct list_head *list, char *name, 169 struct list_head *list, char *name,
170 struct list_head *head_config); 170 struct list_head *head_config, bool auto_merge_stats);
171 171
172int parse_events_multi_pmu_add(struct parse_events_state *parse_state, 172int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
173 char *str, 173 char *str,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 655ecff636a8..a1a01b1ac8b8 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -175,7 +175,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]*
175num_dec [0-9]+ 175num_dec [0-9]+
176num_hex 0x[a-fA-F0-9]+ 176num_hex 0x[a-fA-F0-9]+
177num_raw_hex [a-fA-F0-9]+ 177num_raw_hex [a-fA-F0-9]+
178name [a-zA-Z_*?][a-zA-Z0-9_*?.]* 178name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
179name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* 179name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
180drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? 180drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
181/* If you add a modifier you need to update check_modifier() */ 181/* If you add a modifier you need to update check_modifier() */
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index e81a20ea8d7d..7afeb80cc39e 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -8,6 +8,7 @@
8 8
9#define YYDEBUG 1 9#define YYDEBUG 1
10 10
11#include <fnmatch.h>
11#include <linux/compiler.h> 12#include <linux/compiler.h>
12#include <linux/list.h> 13#include <linux/list.h>
13#include <linux/types.h> 14#include <linux/types.h>
@@ -231,9 +232,13 @@ PE_NAME opt_event_config
231 YYABORT; 232 YYABORT;
232 233
233 ALLOC_LIST(list); 234 ALLOC_LIST(list);
234 if (parse_events_add_pmu(_parse_state, list, $1, $2)) { 235 if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
235 struct perf_pmu *pmu = NULL; 236 struct perf_pmu *pmu = NULL;
236 int ok = 0; 237 int ok = 0;
238 char *pattern;
239
240 if (asprintf(&pattern, "%s*", $1) < 0)
241 YYABORT;
237 242
238 while ((pmu = perf_pmu__scan(pmu)) != NULL) { 243 while ((pmu = perf_pmu__scan(pmu)) != NULL) {
239 char *name = pmu->name; 244 char *name = pmu->name;
@@ -241,14 +246,19 @@ PE_NAME opt_event_config
241 if (!strncmp(name, "uncore_", 7) && 246 if (!strncmp(name, "uncore_", 7) &&
242 strncmp($1, "uncore_", 7)) 247 strncmp($1, "uncore_", 7))
243 name += 7; 248 name += 7;
244 if (!strncmp($1, name, strlen($1))) { 249 if (!fnmatch(pattern, name, 0)) {
245 if (parse_events_copy_term_list(orig_terms, &terms)) 250 if (parse_events_copy_term_list(orig_terms, &terms)) {
251 free(pattern);
246 YYABORT; 252 YYABORT;
247 if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms)) 253 }
254 if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
248 ok++; 255 ok++;
249 parse_events_terms__delete(terms); 256 parse_events_terms__delete(terms);
250 } 257 }
251 } 258 }
259
260 free(pattern);
261
252 if (!ok) 262 if (!ok)
253 YYABORT; 263 YYABORT;
254 } 264 }
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 35fb5ef7d290..b956868fd445 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -984,7 +984,6 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
984 int sample_id_all = 1, cpu; 984 int sample_id_all = 1, cpu;
985 static char *kwlist[] = { "cpu", "sample_id_all", NULL }; 985 static char *kwlist[] = { "cpu", "sample_id_all", NULL };
986 struct perf_mmap *md; 986 struct perf_mmap *md;
987 u64 end, start;
988 int err; 987 int err;
989 988
990 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, 989 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
@@ -992,10 +991,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
992 return NULL; 991 return NULL;
993 992
994 md = &evlist->mmap[cpu]; 993 md = &evlist->mmap[cpu];
995 if (perf_mmap__read_init(md, false, &start, &end) < 0) 994 if (perf_mmap__read_init(md) < 0)
996 goto end; 995 goto end;
997 996
998 event = perf_mmap__read_event(md, false, &start, end); 997 event = perf_mmap__read_event(md);
999 if (event != NULL) { 998 if (event != NULL) {
1000 PyObject *pyevent = pyrf_event__new(event); 999 PyObject *pyevent = pyrf_event__new(event);
1001 struct pyrf_event *pevent = (struct pyrf_event *)pyevent; 1000 struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
@@ -1013,7 +1012,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
1013 err = perf_evsel__parse_sample(evsel, event, &pevent->sample); 1012 err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
1014 1013
1015 /* Consume the even only after we parsed it out. */ 1014 /* Consume the even only after we parsed it out. */
1016 perf_mmap__consume(md, false); 1015 perf_mmap__consume(md);
1017 1016
1018 if (err) 1017 if (err)
1019 return PyErr_Format(PyExc_OSError, 1018 return PyErr_Format(PyExc_OSError,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a1a312d99f30..62b2dd2253eb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2221,3 +2221,25 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
2221 free(bf); 2221 free(bf);
2222 return 0; 2222 return 0;
2223} 2223}
2224
2225struct mem_info *mem_info__get(struct mem_info *mi)
2226{
2227 if (mi)
2228 refcount_inc(&mi->refcnt);
2229 return mi;
2230}
2231
2232void mem_info__put(struct mem_info *mi)
2233{
2234 if (mi && refcount_dec_and_test(&mi->refcnt))
2235 free(mi);
2236}
2237
2238struct mem_info *mem_info__new(void)
2239{
2240 struct mem_info *mi = zalloc(sizeof(*mi));
2241
2242 if (mi)
2243 refcount_set(&mi->refcnt, 1);
2244 return mi;
2245}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0563f33c1eb3..70c16741f50a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -200,9 +200,10 @@ struct branch_info {
200}; 200};
201 201
202struct mem_info { 202struct mem_info {
203 struct addr_map_symbol iaddr; 203 struct addr_map_symbol iaddr;
204 struct addr_map_symbol daddr; 204 struct addr_map_symbol daddr;
205 union perf_mem_data_src data_src; 205 union perf_mem_data_src data_src;
206 refcount_t refcnt;
206}; 207};
207 208
208struct addr_location { 209struct addr_location {
@@ -389,4 +390,16 @@ int sdt_notes__get_count(struct list_head *start);
389#define SDT_NOTE_NAME "stapsdt" 390#define SDT_NOTE_NAME "stapsdt"
390#define NR_ADDR 3 391#define NR_ADDR 3
391 392
393struct mem_info *mem_info__new(void);
394struct mem_info *mem_info__get(struct mem_info *mi);
395void mem_info__put(struct mem_info *mi);
396
397static inline void __mem_info__zput(struct mem_info **mi)
398{
399 mem_info__put(*mi);
400 *mi = NULL;
401}
402
403#define mem_info__zput(mi) __mem_info__zput(&mi)
404
392#endif /* __PERF_SYMBOL */ 405#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 40cfa36c022a..14d44c3235b8 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,7 +26,6 @@ struct thread {
26 pid_t ppid; 26 pid_t ppid;
27 int cpu; 27 int cpu;
28 refcount_t refcnt; 28 refcount_t refcnt;
29 char shortname[3];
30 bool comm_set; 29 bool comm_set;
31 int comm_len; 30 int comm_len;
32 bool dead; /* if set thread has exited */ 31 bool dead; /* if set thread has exited */