aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/include/linux/poison.h5
-rw-r--r--tools/include/uapi/linux/kcmp.h27
-rw-r--r--tools/include/uapi/linux/prctl.h200
-rw-r--r--tools/perf/Documentation/perf-list.txt11
-rw-r--r--tools/perf/Documentation/perf-record.txt2
-rw-r--r--tools/perf/Documentation/perf-report.txt3
-rw-r--r--tools/perf/Documentation/perf-sched.txt8
-rw-r--r--tools/perf/Documentation/perf-script.txt11
-rw-r--r--tools/perf/Documentation/perf-stat.txt7
-rw-r--r--tools/perf/Documentation/perf-top.txt3
-rw-r--r--tools/perf/Makefile.perf39
-rw-r--r--tools/perf/arch/arm/annotate/instructions.c3
-rw-r--r--tools/perf/arch/arm64/annotate/instructions.c3
-rw-r--r--tools/perf/arch/powerpc/annotate/instructions.c4
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c4
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c14
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/builtin-annotate.c10
-rw-r--r--tools/perf/builtin-buildid-cache.c8
-rw-r--r--tools/perf/builtin-buildid-list.c16
-rw-r--r--tools/perf/builtin-c2c.c11
-rw-r--r--tools/perf/builtin-config.c22
-rw-r--r--tools/perf/builtin-diff.c18
-rw-r--r--tools/perf/builtin-evlist.c12
-rw-r--r--tools/perf/builtin-inject.c36
-rw-r--r--tools/perf/builtin-kmem.c11
-rw-r--r--tools/perf/builtin-kvm.c18
-rw-r--r--tools/perf/builtin-list.c7
-rw-r--r--tools/perf/builtin-lock.c12
-rw-r--r--tools/perf/builtin-mem.c13
-rw-r--r--tools/perf/builtin-record.c159
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/builtin-sched.c28
-rw-r--r--tools/perf/builtin-script.c714
-rw-r--r--tools/perf/builtin-stat.c121
-rw-r--r--tools/perf/builtin-timechart.c18
-rw-r--r--tools/perf/builtin-top.c13
-rw-r--r--tools/perf/builtin-trace.c76
-rwxr-xr-xtools/perf/check-headers.sh7
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/cache.json1453
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json62
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/memory.json38
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/other.json98
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json544
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json218
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json158
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json158
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json164
-rw-r--r--tools/perf/pmu-events/jevents.c24
-rw-r--r--tools/perf/pmu-events/jevents.h2
-rw-r--r--tools/perf/pmu-events/pmu-events.h1
-rw-r--r--tools/perf/tests/attr.c2
-rw-r--r--tools/perf/tests/attr.py6
-rw-r--r--tools/perf/tests/attr/base-record2
-rw-r--r--tools/perf/tests/attr/test-record-group1
-rw-r--r--tools/perf/tests/attr/test-record-group-sampling2
-rw-r--r--tools/perf/tests/attr/test-record-group11
-rw-r--r--tools/perf/tests/attr/test-stat-C01
-rw-r--r--tools/perf/tests/attr/test-stat-basic1
-rw-r--r--tools/perf/tests/attr/test-stat-default4
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-18
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-213
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-313
-rw-r--r--tools/perf/tests/attr/test-stat-group2
-rw-r--r--tools/perf/tests/attr/test-stat-group12
-rw-r--r--tools/perf/tests/attr/test-stat-no-inherit1
-rw-r--r--tools/perf/tests/builtin-test.c1
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c2
-rw-r--r--tools/perf/tests/topology.c22
-rw-r--r--tools/perf/trace/beauty/Build2
-rw-r--r--tools/perf/trace/beauty/beauty.h18
-rw-r--r--tools/perf/trace/beauty/kcmp.c44
-rwxr-xr-xtools/perf/trace/beauty/kcmp_type.sh10
-rwxr-xr-xtools/perf/trace/beauty/madvise_behavior.sh10
-rw-r--r--tools/perf/trace/beauty/mmap.c38
-rw-r--r--tools/perf/trace/beauty/prctl.c82
-rwxr-xr-xtools/perf/trace/beauty/prctl_option.sh17
-rw-r--r--tools/perf/ui/browsers/hists.c180
-rw-r--r--tools/perf/ui/progress.c6
-rw-r--r--tools/perf/ui/progress.h12
-rw-r--r--tools/perf/ui/stdio/hist.c77
-rw-r--r--tools/perf/ui/tui/progress.c32
-rw-r--r--tools/perf/util/Build3
-rw-r--r--tools/perf/util/annotate.c10
-rw-r--r--tools/perf/util/auxtrace.c4
-rw-r--r--tools/perf/util/callchain.c179
-rw-r--r--tools/perf/util/callchain.h6
-rw-r--r--tools/perf/util/comm.c18
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/data-convert-bt.c12
-rw-r--r--tools/perf/util/data.c95
-rw-r--r--tools/perf/util/data.h38
-rw-r--r--tools/perf/util/debug.c31
-rw-r--r--tools/perf/util/dso.c20
-rw-r--r--tools/perf/util/dso.h6
-rw-r--r--tools/perf/util/event.c162
-rw-r--r--tools/perf/util/event.h3
-rw-r--r--tools/perf/util/evlist.c248
-rw-r--r--tools/perf/util/evlist.h77
-rw-r--r--tools/perf/util/evsel.c7
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/evsel_fprintf.c37
-rw-r--r--tools/perf/util/header.c20
-rw-r--r--tools/perf/util/hist.c7
-rw-r--r--tools/perf/util/intel-bts.c6
-rw-r--r--tools/perf/util/intel-pt.c6
-rw-r--r--tools/perf/util/jit.h2
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/machine.c228
-rw-r--r--tools/perf/util/machine.h33
-rw-r--r--tools/perf/util/map.c34
-rw-r--r--tools/perf/util/map.h3
-rw-r--r--tools/perf/util/metricgroup.c490
-rw-r--r--tools/perf/util/metricgroup.h31
-rw-r--r--tools/perf/util/mmap.c352
-rw-r--r--tools/perf/util/mmap.h97
-rw-r--r--tools/perf/util/namespaces.c1
-rw-r--r--tools/perf/util/namespaces.h5
-rw-r--r--tools/perf/util/parse-events.c29
-rw-r--r--tools/perf/util/parse-events.h3
-rw-r--r--tools/perf/util/parse-events.l3
-rw-r--r--tools/perf/util/pmu.c55
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/print_binary.c30
-rw-r--r--tools/perf/util/print_binary.h18
-rw-r--r--tools/perf/util/probe-file.c1
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/rb_resort.h5
-rw-r--r--tools/perf/util/rwsem.c32
-rw-r--r--tools/perf/util/rwsem.h19
-rw-r--r--tools/perf/util/session.c46
-rw-r--r--tools/perf/util/session.h4
-rw-r--r--tools/perf/util/sort.c6
-rw-r--r--tools/perf/util/sort.h1
-rw-r--r--tools/perf/util/srcline.c296
-rw-r--r--tools/perf/util/srcline.h26
-rw-r--r--tools/perf/util/stat-shadow.c158
-rw-r--r--tools/perf/util/stat.c24
-rw-r--r--tools/perf/util/stat.h6
-rw-r--r--tools/perf/util/symbol.c9
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/perf/util/thread.c57
-rw-r--r--tools/perf/util/thread.h3
-rw-r--r--tools/perf/util/top.h1
-rw-r--r--tools/perf/util/trace-event-info.c1
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/perf/util/util.c16
-rw-r--r--tools/perf/util/util.h7
-rw-r--r--tools/perf/util/vdso.c4
-rw-r--r--tools/perf/util/zlib.c1
162 files changed, 7869 insertions, 1780 deletions
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 4bf6777a8a03..9fdcd3eaac3b 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -15,6 +15,10 @@
15# define POISON_POINTER_DELTA 0 15# define POISON_POINTER_DELTA 0
16#endif 16#endif
17 17
18#ifdef __cplusplus
19#define LIST_POISON1 NULL
20#define LIST_POISON2 NULL
21#else
18/* 22/*
19 * These are non-NULL pointers that will result in page faults 23 * These are non-NULL pointers that will result in page faults
20 * under normal circumstances, used to verify that nobody uses 24 * under normal circumstances, used to verify that nobody uses
@@ -22,6 +26,7 @@
22 */ 26 */
23#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) 27#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
24#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) 28#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
29#endif
25 30
26/********** include/linux/timer.h **********/ 31/********** include/linux/timer.h **********/
27/* 32/*
diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h
new file mode 100644
index 000000000000..481e103da78e
--- /dev/null
+++ b/tools/include/uapi/linux/kcmp.h
@@ -0,0 +1,27 @@
1#ifndef _UAPI_LINUX_KCMP_H
2#define _UAPI_LINUX_KCMP_H
3
4#include <linux/types.h>
5
6/* Comparison type */
7enum kcmp_type {
8 KCMP_FILE,
9 KCMP_VM,
10 KCMP_FILES,
11 KCMP_FS,
12 KCMP_SIGHAND,
13 KCMP_IO,
14 KCMP_SYSVSEM,
15 KCMP_EPOLL_TFD,
16
17 KCMP_TYPES,
18};
19
20/* Slot for KCMP_EPOLL_TFD */
21struct kcmp_epoll_slot {
22 __u32 efd; /* epoll file descriptor */
23 __u32 tfd; /* target file number */
24 __u32 toff; /* target offset within same numbered sequence */
25};
26
27#endif /* _UAPI_LINUX_KCMP_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
new file mode 100644
index 000000000000..a8d0759a9e40
--- /dev/null
+++ b/tools/include/uapi/linux/prctl.h
@@ -0,0 +1,200 @@
1#ifndef _LINUX_PRCTL_H
2#define _LINUX_PRCTL_H
3
4#include <linux/types.h>
5
6/* Values to pass as first argument to prctl() */
7
8#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */
9#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */
10
11/* Get/set current->mm->dumpable */
12#define PR_GET_DUMPABLE 3
13#define PR_SET_DUMPABLE 4
14
15/* Get/set unaligned access control bits (if meaningful) */
16#define PR_GET_UNALIGN 5
17#define PR_SET_UNALIGN 6
18# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */
19# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */
20
21/* Get/set whether or not to drop capabilities on setuid() away from
22 * uid 0 (as per security/commoncap.c) */
23#define PR_GET_KEEPCAPS 7
24#define PR_SET_KEEPCAPS 8
25
26/* Get/set floating-point emulation control bits (if meaningful) */
27#define PR_GET_FPEMU 9
28#define PR_SET_FPEMU 10
29# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */
30# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */
31
32/* Get/set floating-point exception mode (if meaningful) */
33#define PR_GET_FPEXC 11
34#define PR_SET_FPEXC 12
35# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */
36# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */
37# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */
38# define PR_FP_EXC_UND 0x040000 /* floating point underflow */
39# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */
40# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */
41# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */
42# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */
43# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */
44# define PR_FP_EXC_PRECISE 3 /* precise exception mode */
45
46/* Get/set whether we use statistical process timing or accurate timestamp
47 * based process timing */
48#define PR_GET_TIMING 13
49#define PR_SET_TIMING 14
50# define PR_TIMING_STATISTICAL 0 /* Normal, traditional,
51 statistical process timing */
52# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based
53 process timing */
54
55#define PR_SET_NAME 15 /* Set process name */
56#define PR_GET_NAME 16 /* Get process name */
57
58/* Get/set process endian */
59#define PR_GET_ENDIAN 19
60#define PR_SET_ENDIAN 20
61# define PR_ENDIAN_BIG 0
62# define PR_ENDIAN_LITTLE 1 /* True little endian mode */
63# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */
64
65/* Get/set process seccomp mode */
66#define PR_GET_SECCOMP 21
67#define PR_SET_SECCOMP 22
68
69/* Get/set the capability bounding set (as per security/commoncap.c) */
70#define PR_CAPBSET_READ 23
71#define PR_CAPBSET_DROP 24
72
73/* Get/set the process' ability to use the timestamp counter instruction */
74#define PR_GET_TSC 25
75#define PR_SET_TSC 26
76# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
77# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
78
79/* Get/set securebits (as per security/commoncap.c) */
80#define PR_GET_SECUREBITS 27
81#define PR_SET_SECUREBITS 28
82
83/*
84 * Get/set the timerslack as used by poll/select/nanosleep
85 * A value of 0 means "use default"
86 */
87#define PR_SET_TIMERSLACK 29
88#define PR_GET_TIMERSLACK 30
89
90#define PR_TASK_PERF_EVENTS_DISABLE 31
91#define PR_TASK_PERF_EVENTS_ENABLE 32
92
93/*
94 * Set early/late kill mode for hwpoison memory corruption.
95 * This influences when the process gets killed on a memory corruption.
96 */
97#define PR_MCE_KILL 33
98# define PR_MCE_KILL_CLEAR 0
99# define PR_MCE_KILL_SET 1
100
101# define PR_MCE_KILL_LATE 0
102# define PR_MCE_KILL_EARLY 1
103# define PR_MCE_KILL_DEFAULT 2
104
105#define PR_MCE_KILL_GET 34
106
107/*
108 * Tune up process memory map specifics.
109 */
110#define PR_SET_MM 35
111# define PR_SET_MM_START_CODE 1
112# define PR_SET_MM_END_CODE 2
113# define PR_SET_MM_START_DATA 3
114# define PR_SET_MM_END_DATA 4
115# define PR_SET_MM_START_STACK 5
116# define PR_SET_MM_START_BRK 6
117# define PR_SET_MM_BRK 7
118# define PR_SET_MM_ARG_START 8
119# define PR_SET_MM_ARG_END 9
120# define PR_SET_MM_ENV_START 10
121# define PR_SET_MM_ENV_END 11
122# define PR_SET_MM_AUXV 12
123# define PR_SET_MM_EXE_FILE 13
124# define PR_SET_MM_MAP 14
125# define PR_SET_MM_MAP_SIZE 15
126
127/*
128 * This structure provides new memory descriptor
129 * map which mostly modifies /proc/pid/stat[m]
130 * output for a task. This mostly done in a
131 * sake of checkpoint/restore functionality.
132 */
133struct prctl_mm_map {
134 __u64 start_code; /* code section bounds */
135 __u64 end_code;
136 __u64 start_data; /* data section bounds */
137 __u64 end_data;
138 __u64 start_brk; /* heap for brk() syscall */
139 __u64 brk;
140 __u64 start_stack; /* stack starts at */
141 __u64 arg_start; /* command line arguments bounds */
142 __u64 arg_end;
143 __u64 env_start; /* environment variables bounds */
144 __u64 env_end;
145 __u64 *auxv; /* auxiliary vector */
146 __u32 auxv_size; /* vector size */
147 __u32 exe_fd; /* /proc/$pid/exe link file */
148};
149
150/*
151 * Set specific pid that is allowed to ptrace the current task.
152 * A value of 0 mean "no process".
153 */
154#define PR_SET_PTRACER 0x59616d61
155# define PR_SET_PTRACER_ANY ((unsigned long)-1)
156
157#define PR_SET_CHILD_SUBREAPER 36
158#define PR_GET_CHILD_SUBREAPER 37
159
160/*
161 * If no_new_privs is set, then operations that grant new privileges (i.e.
162 * execve) will either fail or not grant them. This affects suid/sgid,
163 * file capabilities, and LSMs.
164 *
165 * Operations that merely manipulate or drop existing privileges (setresuid,
166 * capset, etc.) will still work. Drop those privileges if you want them gone.
167 *
168 * Changing LSM security domain is considered a new privilege. So, for example,
169 * asking selinux for a specific new context (e.g. with runcon) will result
170 * in execve returning -EPERM.
171 *
172 * See Documentation/prctl/no_new_privs.txt for more details.
173 */
174#define PR_SET_NO_NEW_PRIVS 38
175#define PR_GET_NO_NEW_PRIVS 39
176
177#define PR_GET_TID_ADDRESS 40
178
179#define PR_SET_THP_DISABLE 41
180#define PR_GET_THP_DISABLE 42
181
182/*
183 * Tell the kernel to start/stop helping userspace manage bounds tables.
184 */
185#define PR_MPX_ENABLE_MANAGEMENT 43
186#define PR_MPX_DISABLE_MANAGEMENT 44
187
188#define PR_SET_FP_MODE 45
189#define PR_GET_FP_MODE 46
190# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */
191# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */
192
193/* Control the ambient capability set */
194#define PR_CAP_AMBIENT 47
195# define PR_CAP_AMBIENT_IS_SET 1
196# define PR_CAP_AMBIENT_RAISE 2
197# define PR_CAP_AMBIENT_LOWER 3
198# define PR_CAP_AMBIENT_CLEAR_ALL 4
199
200#endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index f709de54707b..e2a897ae3596 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,8 @@ perf-list - List all symbolic event types
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob] 11'perf list' [--no-desc] [--long-desc]
12 [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
12 13
13DESCRIPTION 14DESCRIPTION
14----------- 15-----------
@@ -47,6 +48,8 @@ counted. The following modifiers exist:
47 P - use maximum detected precise level 48 P - use maximum detected precise level
48 S - read sample value (PERF_SAMPLE_READ) 49 S - read sample value (PERF_SAMPLE_READ)
49 D - pin the event to the PMU 50 D - pin the event to the PMU
51 W - group is weak and will fallback to non-group if not schedulable,
52 only supported in 'perf stat' for now.
50 53
51The 'p' modifier can be used for specifying how precise the instruction 54The 'p' modifier can be used for specifying how precise the instruction
52address should be. The 'p' modifier can be specified multiple times: 55address should be. The 'p' modifier can be specified multiple times:
@@ -201,7 +204,7 @@ For example Intel Core CPUs typically have four generic performance counters
201for the core, plus three fixed counters for instructions, cycles and 204for the core, plus three fixed counters for instructions, cycles and
202ref-cycles. Some special events have restrictions on which counter they 205ref-cycles. Some special events have restrictions on which counter they
203can schedule, and may not support multiple instances in a single group. 206can schedule, and may not support multiple instances in a single group.
204When too many events are specified in the group none of them will not 207When too many events are specified in the group some of them will not
205be measured. 208be measured.
206 209
207Globally pinned events can limit the number of counters available for 210Globally pinned events can limit the number of counters available for
@@ -246,6 +249,10 @@ To limit the list use:
246 249
247. 'sdt' to list all Statically Defined Tracepoint events. 250. 'sdt' to list all Statically Defined Tracepoint events.
248 251
252. 'metric' to list metrics
253
254. 'metricgroup' to list metricgroups with metrics.
255
249. If none of the above is matched, it will apply the supplied glob to all 256. If none of the above is matched, it will apply the supplied glob to all
250 events, printing the ones that match. 257 events, printing the ones that match.
251 258
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 63526f4416ea..5a626ef666c2 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -377,6 +377,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use
377--intr-regs=\?. To name registers, pass a comma separated list such as 377--intr-regs=\?. To name registers, pass a comma separated list such as
378--intr-regs=ax,bx. The list of register is architecture dependent. 378--intr-regs=ax,bx. The list of register is architecture dependent.
379 379
380--user-regs::
381Capture user registers at sample time. Same arguments as -I.
380 382
381--running-time:: 383--running-time::
382Record running and enabled time for read events (:S) 384Record running and enabled time for read events (:S)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 383a98d992ed..ddde2b54af57 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -434,7 +434,8 @@ include::itrace.txt[]
434 434
435--inline:: 435--inline::
436 If a callgraph address belongs to an inlined function, the inline stack 436 If a callgraph address belongs to an inlined function, the inline stack
437 will be printed. Each entry is function name or file/line. 437 will be printed. Each entry is function name or file/line. Enabled by
438 default, disable with --no-inline.
438 439
439include::callchain-overhead-calculation.txt[] 440include::callchain-overhead-calculation.txt[]
440 441
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index a092a2499e8f..55b67338548e 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -106,6 +106,14 @@ OPTIONS for 'perf sched timehist'
106--max-stack:: 106--max-stack::
107 Maximum number of functions to display in backtrace, default 5. 107 Maximum number of functions to display in backtrace, default 5.
108 108
109-p=::
110--pid=::
111 Only show events for given process ID (comma separated list).
112
113-t=::
114--tid=::
115 Only show events for given thread ID (comma separated list).
116
109-s:: 117-s::
110--summary:: 118--summary::
111 Show only a summary of scheduling by thread with min, max, and average 119 Show only a summary of scheduling by thread with min, max, and average
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 18dfcfa38454..2811fcf684cb 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,8 @@ OPTIONS
116--fields:: 116--fields::
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
119 srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, 119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
120 callindent, insn, insnlen, synth, phys_addr. 120 brstackoff, callindent, insn, insnlen, synth, phys_addr.
121 Field list can be prepended with the type, trace, sw or hw, 121 Field list can be prepended with the type, trace, sw or hw,
122 to indicate to which event type the field list applies. 122 to indicate to which event type the field list applies.
123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace 123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -325,9 +325,14 @@ include::itrace.txt[]
325 Set the maximum number of program blocks to print with brstackasm for 325 Set the maximum number of program blocks to print with brstackasm for
326 each sample. 326 each sample.
327 327
328--per-event-dump::
329 Create per event files with a "perf.data.EVENT.dump" name instead of
330 printing to stdout, useful, for instance, for generating flamegraphs.
331
328--inline:: 332--inline::
329 If a callgraph address belongs to an inlined function, the inline stack 333 If a callgraph address belongs to an inlined function, the inline stack
330 will be printed. Each entry has function name and file/line. 334 will be printed. Each entry has function name and file/line. Enabled by
335 default, disable with --no-inline.
331 336
332SEE ALSO 337SEE ALSO
333-------- 338--------
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c37d61682dfb..823fce7674bb 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -199,6 +199,13 @@ Aggregate counts per processor socket for system-wide mode measurements.
199--per-core:: 199--per-core::
200Aggregate counts per physical processor for system-wide mode measurements. 200Aggregate counts per physical processor for system-wide mode measurements.
201 201
202-M::
203--metrics::
204Print metrics or metricgroups specified in a comma separated list.
205For a group all metrics from the group are added.
206The events from the metrics are automatically measured.
207See perf list output for the possble metrics and metricgroups.
208
202-A:: 209-A::
203--no-aggr:: 210--no-aggr::
204Do not aggregate counts across all monitored CPUs. 211Do not aggregate counts across all monitored CPUs.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index d864ea6fd367..4353262bc462 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -240,6 +240,9 @@ Default is to monitor all CPUS.
240--force:: 240--force::
241 Don't do ownership validation. 241 Don't do ownership validation.
242 242
243--num-thread-synthesize::
244 The number of threads to run when synthesizing events for existing processes.
245 By default, the number of threads equals to the number of online CPUs.
243 246
244INTERACTIVE PROMPTING KEYS 247INTERACTIVE PROMPTING KEYS
245-------------------------- 248--------------------------
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 91ef44bfaf3e..68cf1360a3f3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -173,7 +173,7 @@ AWK = awk
173# non-config cases 173# non-config cases
174config := 1 174config := 1
175 175
176NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf 176NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
177 177
178ifdef MAKECMDGOALS 178ifdef MAKECMDGOALS
179ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) 179ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -420,6 +420,13 @@ sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
420$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl) 420$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl)
421 $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@ 421 $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@
422 422
423kcmp_type_array := $(beauty_outdir)/kcmp_type_array.c
424kcmp_hdr_dir := $(srctree)/tools/include/uapi/linux/
425kcmp_type_tbl := $(srctree)/tools/perf/trace/beauty/kcmp_type.sh
426
427$(kcmp_type_array): $(kcmp_hdr_dir)/kcmp.h $(kcmp_type_tbl)
428 $(Q)$(SHELL) '$(kcmp_type_tbl)' $(kcmp_hdr_dir) > $@
429
423kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c 430kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c
424kvm_hdr_dir := $(srctree)/tools/include/uapi/linux 431kvm_hdr_dir := $(srctree)/tools/include/uapi/linux
425kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh 432kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -441,6 +448,20 @@ perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh
441$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl) 448$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl)
442 $(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@ 449 $(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@
443 450
451madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c
452madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
453madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
454
455$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
456 $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
457
458prctl_option_array := $(beauty_outdir)/prctl_option_array.c
459prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
460prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
461
462$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
463 $(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
464
444all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) 465all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
445 466
446$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) 467$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
@@ -539,9 +560,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
539 $(pkey_alloc_access_rights_array) \ 560 $(pkey_alloc_access_rights_array) \
540 $(sndrv_pcm_ioctl_array) \ 561 $(sndrv_pcm_ioctl_array) \
541 $(sndrv_ctl_ioctl_array) \ 562 $(sndrv_ctl_ioctl_array) \
563 $(kcmp_type_array) \
542 $(kvm_ioctl_array) \ 564 $(kvm_ioctl_array) \
543 $(vhost_virtio_ioctl_array) \ 565 $(vhost_virtio_ioctl_array) \
544 $(perf_ioctl_array) 566 $(madvise_behavior_array) \
567 $(perf_ioctl_array) \
568 $(prctl_option_array)
545 569
546$(OUTPUT)%.o: %.c prepare FORCE 570$(OUTPUT)%.o: %.c prepare FORCE
547 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ 571 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -802,7 +826,10 @@ config-clean:
802 $(call QUIET_CLEAN, config) 826 $(call QUIET_CLEAN, config)
803 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null 827 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
804 828
805clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean 829python-clean:
830 $(python-clean)
831
832clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
806 $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) 833 $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
807 $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete 834 $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
808 $(Q)$(RM) $(OUTPUT).config-detected 835 $(Q)$(RM) $(OUTPUT).config-detected
@@ -811,15 +838,17 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
811 $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ 838 $(OUTPUT)util/intel-pt-decoder/inat-tables.c \
812 $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ 839 $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
813 $(OUTPUT)pmu-events/pmu-events.c \ 840 $(OUTPUT)pmu-events/pmu-events.c \
841 $(OUTPUT)$(madvise_behavior_array) \
814 $(OUTPUT)$(drm_ioctl_array) \ 842 $(OUTPUT)$(drm_ioctl_array) \
815 $(OUTPUT)$(pkey_alloc_access_rights_array) \ 843 $(OUTPUT)$(pkey_alloc_access_rights_array) \
816 $(OUTPUT)$(sndrv_ctl_ioctl_array) \ 844 $(OUTPUT)$(sndrv_ctl_ioctl_array) \
817 $(OUTPUT)$(sndrv_pcm_ioctl_array) \ 845 $(OUTPUT)$(sndrv_pcm_ioctl_array) \
818 $(OUTPUT)$(kvm_ioctl_array) \ 846 $(OUTPUT)$(kvm_ioctl_array) \
847 $(OUTPUT)$(kcmp_type_array) \
819 $(OUTPUT)$(vhost_virtio_ioctl_array) \ 848 $(OUTPUT)$(vhost_virtio_ioctl_array) \
820 $(OUTPUT)$(perf_ioctl_array) 849 $(OUTPUT)$(perf_ioctl_array) \
850 $(OUTPUT)$(prctl_option_array)
821 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean 851 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
822 $(python-clean)
823 852
824# 853#
825# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) 854# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c
index b39b16395aac..f64516d5b23e 100644
--- a/tools/perf/arch/arm/annotate/instructions.c
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -1,4 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h>
2#include <sys/types.h> 3#include <sys/types.h>
3#include <regex.h> 4#include <regex.h>
4 5
@@ -24,7 +25,7 @@ static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const c
24 return ops; 25 return ops;
25} 26}
26 27
27static int arm__annotate_init(struct arch *arch) 28static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
28{ 29{
29 struct arm_annotate *arm; 30 struct arm_annotate *arm;
30 int err; 31 int err;
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 9a3e0523e2c9..6688977e4ac7 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -1,4 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h>
2#include <sys/types.h> 3#include <sys/types.h>
3#include <regex.h> 4#include <regex.h>
4 5
@@ -26,7 +27,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const
26 return ops; 27 return ops;
27} 28}
28 29
29static int arm64__annotate_init(struct arch *arch) 30static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
30{ 31{
31 struct arm64_annotate *arm; 32 struct arm64_annotate *arm;
32 int err; 33 int err;
diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c
index b7bc04980fe8..a3f423c27cae 100644
--- a/tools/perf/arch/powerpc/annotate/instructions.c
+++ b/tools/perf/arch/powerpc/annotate/instructions.c
@@ -1,4 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h>
3
2static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name) 4static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name)
3{ 5{
4 int i; 6 int i;
@@ -47,7 +49,7 @@ static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, con
47 return ops; 49 return ops;
48} 50}
49 51
50static int powerpc__annotate_init(struct arch *arch) 52static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
51{ 53{
52 if (!arch->initialized) { 54 if (!arch->initialized) {
53 arch->initialized = true; 55 arch->initialized = true;
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index c9a81673e8aa..e0e466c650df 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -1,4 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h>
3
2static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) 4static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
3{ 5{
4 struct ins_ops *ops = NULL; 6 struct ins_ops *ops = NULL;
@@ -20,7 +22,7 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
20 return ops; 22 return ops;
21} 23}
22 24
23static int s390__annotate_init(struct arch *arch) 25static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
24{ 26{
25 if (!arch->initialized) { 27 if (!arch->initialized) {
26 arch->initialized = true; 28 arch->initialized = true;
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 4adfb4ce2864..5bd1ba8c0282 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -123,3 +123,17 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
123 123
124 return -1; 124 return -1;
125} 125}
126
127static int x86__annotate_init(struct arch *arch, char *cpuid)
128{
129 int err = 0;
130
131 if (arch->initialized)
132 return 0;
133
134 if (cpuid)
135 err = x86__cpuid_parse(arch, cpuid);
136
137 arch->initialized = true;
138 return err;
139}
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 9834fdc7c59e..c1bd979b957b 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -9,7 +9,6 @@ struct test;
9int test__rdpmc(struct test *test __maybe_unused, int subtest); 9int test__rdpmc(struct test *test __maybe_unused, int subtest);
10int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); 10int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
11int test__insn_x86(struct test *test __maybe_unused, int subtest); 11int test__insn_x86(struct test *test __maybe_unused, int subtest);
12int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest);
13 12
14#ifdef HAVE_DWARF_UNWIND_SUPPORT 13#ifdef HAVE_DWARF_UNWIND_SUPPORT
15struct thread; 14struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index cbb7e978166b..8e2c5a38c3b9 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -5,4 +5,3 @@ libperf-y += arch-tests.o
5libperf-y += rdpmc.o 5libperf-y += rdpmc.o
6libperf-y += perf-time-to-tsc.o 6libperf-y += perf-time-to-tsc.o
7libperf-$(CONFIG_AUXTRACE) += insn-x86.o 7libperf-$(CONFIG_AUXTRACE) += insn-x86.o
8libperf-y += intel-cqm.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 34a078136a47..cc1802ff5410 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -25,10 +25,6 @@ struct test arch_tests[] = {
25 }, 25 },
26#endif 26#endif
27 { 27 {
28 .desc = "Intel cqm nmi context read",
29 .func = test__intel_cqm_count_nmi_context,
30 },
31 {
32 .func = NULL, 28 .func = NULL,
33 }, 29 },
34 30
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 3d32aa45016d..f15731a3d438 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -357,7 +357,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
357 } 357 }
358 358
359 if (total_nr_samples == 0) { 359 if (total_nr_samples == 0) {
360 ui__error("The %s file has no samples!\n", session->file->path); 360 ui__error("The %s file has no samples!\n", session->data->file.path);
361 goto out; 361 goto out;
362 } 362 }
363 363
@@ -401,7 +401,7 @@ int cmd_annotate(int argc, const char **argv)
401 .ordering_requires_timestamps = true, 401 .ordering_requires_timestamps = true,
402 }, 402 },
403 }; 403 };
404 struct perf_data_file file = { 404 struct perf_data data = {
405 .mode = PERF_DATA_MODE_READ, 405 .mode = PERF_DATA_MODE_READ,
406 }; 406 };
407 struct option options[] = { 407 struct option options[] = {
@@ -411,7 +411,7 @@ int cmd_annotate(int argc, const char **argv)
411 "only consider symbols in these dsos"), 411 "only consider symbols in these dsos"),
412 OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol", 412 OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
413 "symbol to annotate"), 413 "symbol to annotate"),
414 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 414 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
415 OPT_INCR('v', "verbose", &verbose, 415 OPT_INCR('v', "verbose", &verbose,
416 "be more verbose (show symbol address, etc)"), 416 "be more verbose (show symbol address, etc)"),
417 OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"), 417 OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
@@ -483,9 +483,9 @@ int cmd_annotate(int argc, const char **argv)
483 if (quiet) 483 if (quiet)
484 perf_quiet_option(); 484 perf_quiet_option();
485 485
486 file.path = input_name; 486 data.file.path = input_name;
487 487
488 annotate.session = perf_session__new(&file, false, &annotate.tool); 488 annotate.session = perf_session__new(&data, false, &annotate.tool);
489 if (annotate.session == NULL) 489 if (annotate.session == NULL)
490 return -1; 490 return -1;
491 491
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 5f53a7ad5ef3..3d354ba6e9c5 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -312,7 +312,7 @@ int cmd_buildid_cache(int argc, const char **argv)
312 *kcore_filename = NULL; 312 *kcore_filename = NULL;
313 char sbuf[STRERR_BUFSIZE]; 313 char sbuf[STRERR_BUFSIZE];
314 314
315 struct perf_data_file file = { 315 struct perf_data data = {
316 .mode = PERF_DATA_MODE_READ, 316 .mode = PERF_DATA_MODE_READ,
317 }; 317 };
318 struct perf_session *session = NULL; 318 struct perf_session *session = NULL;
@@ -353,10 +353,10 @@ int cmd_buildid_cache(int argc, const char **argv)
353 nsi = nsinfo__new(ns_id); 353 nsi = nsinfo__new(ns_id);
354 354
355 if (missing_filename) { 355 if (missing_filename) {
356 file.path = missing_filename; 356 data.file.path = missing_filename;
357 file.force = force; 357 data.force = force;
358 358
359 session = perf_session__new(&file, false, NULL); 359 session = perf_session__new(&data, false, NULL);
360 if (session == NULL) 360 if (session == NULL)
361 return -1; 361 return -1;
362 } 362 }
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index ec2f327cd79d..78abbe8d9d5f 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -51,10 +51,12 @@ static bool dso__skip_buildid(struct dso *dso, int with_hits)
51static int perf_session__list_build_ids(bool force, bool with_hits) 51static int perf_session__list_build_ids(bool force, bool with_hits)
52{ 52{
53 struct perf_session *session; 53 struct perf_session *session;
54 struct perf_data_file file = { 54 struct perf_data data = {
55 .path = input_name, 55 .file = {
56 .mode = PERF_DATA_MODE_READ, 56 .path = input_name,
57 .force = force, 57 },
58 .mode = PERF_DATA_MODE_READ,
59 .force = force,
58 }; 60 };
59 61
60 symbol__elf_init(); 62 symbol__elf_init();
@@ -64,7 +66,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
64 if (filename__fprintf_build_id(input_name, stdout) > 0) 66 if (filename__fprintf_build_id(input_name, stdout) > 0)
65 goto out; 67 goto out;
66 68
67 session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); 69 session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops);
68 if (session == NULL) 70 if (session == NULL)
69 return -1; 71 return -1;
70 72
@@ -72,7 +74,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
72 * We take all buildids when the file contains AUX area tracing data 74 * We take all buildids when the file contains AUX area tracing data
73 * because we do not decode the trace because it would take too long. 75 * because we do not decode the trace because it would take too long.
74 */ 76 */
75 if (!perf_data_file__is_pipe(&file) && 77 if (!perf_data__is_pipe(&data) &&
76 perf_header__has_feat(&session->header, HEADER_AUXTRACE)) 78 perf_header__has_feat(&session->header, HEADER_AUXTRACE))
77 with_hits = false; 79 with_hits = false;
78 80
@@ -80,7 +82,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
80 * in pipe-mode, the only way to get the buildids is to parse 82 * in pipe-mode, the only way to get the buildids is to parse
81 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID 83 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
82 */ 84 */
83 if (with_hits || perf_data_file__is_pipe(&file)) 85 if (with_hits || perf_data__is_pipe(&data))
84 perf_session__process_events(session); 86 perf_session__process_events(session);
85 87
86 perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); 88 perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index fd32ad08c6d4..17855c4626a0 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2524,7 +2524,7 @@ static int perf_c2c__report(int argc, const char **argv)
2524{ 2524{
2525 struct perf_session *session; 2525 struct perf_session *session;
2526 struct ui_progress prog; 2526 struct ui_progress prog;
2527 struct perf_data_file file = { 2527 struct perf_data data = {
2528 .mode = PERF_DATA_MODE_READ, 2528 .mode = PERF_DATA_MODE_READ,
2529 }; 2529 };
2530 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; 2530 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
@@ -2573,8 +2573,8 @@ static int perf_c2c__report(int argc, const char **argv)
2573 if (!input_name || !strlen(input_name)) 2573 if (!input_name || !strlen(input_name))
2574 input_name = "perf.data"; 2574 input_name = "perf.data";
2575 2575
2576 file.path = input_name; 2576 data.file.path = input_name;
2577 file.force = symbol_conf.force; 2577 data.force = symbol_conf.force;
2578 2578
2579 err = setup_display(display); 2579 err = setup_display(display);
2580 if (err) 2580 if (err)
@@ -2592,7 +2592,7 @@ static int perf_c2c__report(int argc, const char **argv)
2592 goto out; 2592 goto out;
2593 } 2593 }
2594 2594
2595 session = perf_session__new(&file, 0, &c2c.tool); 2595 session = perf_session__new(&data, 0, &c2c.tool);
2596 if (session == NULL) { 2596 if (session == NULL) {
2597 pr_debug("No memory for session\n"); 2597 pr_debug("No memory for session\n");
2598 goto out; 2598 goto out;
@@ -2612,7 +2612,7 @@ static int perf_c2c__report(int argc, const char **argv)
2612 goto out_session; 2612 goto out_session;
2613 2613
2614 /* No pipe support at the moment. */ 2614 /* No pipe support at the moment. */
2615 if (perf_data_file__is_pipe(session->file)) { 2615 if (perf_data__is_pipe(session->data)) {
2616 pr_debug("No pipe support at the moment.\n"); 2616 pr_debug("No pipe support at the moment.\n");
2617 goto out_session; 2617 goto out_session;
2618 } 2618 }
@@ -2733,6 +2733,7 @@ static int perf_c2c__record(int argc, const char **argv)
2733 if (!perf_mem_events[j].supported) { 2733 if (!perf_mem_events[j].supported) {
2734 pr_err("failed: event '%s' not supported\n", 2734 pr_err("failed: event '%s' not supported\n",
2735 perf_mem_events[j].name); 2735 perf_mem_events[j].name);
2736 free(rec_argv);
2736 return -1; 2737 return -1;
2737 } 2738 }
2738 2739
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index abfa49eaf7fd..514f70f95b57 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -35,8 +35,7 @@ static struct option config_options[] = {
35 OPT_END() 35 OPT_END()
36}; 36};
37 37
38static int set_config(struct perf_config_set *set, const char *file_name, 38static int set_config(struct perf_config_set *set, const char *file_name)
39 const char *var, const char *value)
40{ 39{
41 struct perf_config_section *section = NULL; 40 struct perf_config_section *section = NULL;
42 struct perf_config_item *item = NULL; 41 struct perf_config_item *item = NULL;
@@ -50,7 +49,6 @@ static int set_config(struct perf_config_set *set, const char *file_name,
50 if (!fp) 49 if (!fp)
51 return -1; 50 return -1;
52 51
53 perf_config_set__collect(set, file_name, var, value);
54 fprintf(fp, "%s\n", first_line); 52 fprintf(fp, "%s\n", first_line);
55 53
56 /* overwrite configvariables */ 54 /* overwrite configvariables */
@@ -162,6 +160,7 @@ int cmd_config(int argc, const char **argv)
162 struct perf_config_set *set; 160 struct perf_config_set *set;
163 char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); 161 char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
164 const char *config_filename; 162 const char *config_filename;
163 bool changed = false;
165 164
166 argc = parse_options(argc, argv, config_options, config_usage, 165 argc = parse_options(argc, argv, config_options, config_usage,
167 PARSE_OPT_STOP_AT_NON_OPTION); 166 PARSE_OPT_STOP_AT_NON_OPTION);
@@ -232,15 +231,26 @@ int cmd_config(int argc, const char **argv)
232 goto out_err; 231 goto out_err;
233 } 232 }
234 } else { 233 } else {
235 if (set_config(set, config_filename, var, value) < 0) { 234 if (perf_config_set__collect(set, config_filename,
236 pr_err("Failed to set '%s=%s' on %s\n", 235 var, value) < 0) {
237 var, value, config_filename); 236 pr_err("Failed to add '%s=%s'\n",
237 var, value);
238 free(arg); 238 free(arg);
239 goto out_err; 239 goto out_err;
240 } 240 }
241 changed = true;
241 } 242 }
242 free(arg); 243 free(arg);
243 } 244 }
245
246 if (!changed)
247 break;
248
249 if (set_config(set, config_filename) < 0) {
250 pr_err("Failed to set the configs on %s\n",
251 config_filename);
252 goto out_err;
253 }
244 } 254 }
245 255
246 ret = 0; 256 ret = 0;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 56223bdfa205..d660cb7b222b 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -48,7 +48,7 @@ struct diff_hpp_fmt {
48 48
49struct data__file { 49struct data__file {
50 struct perf_session *session; 50 struct perf_session *session;
51 struct perf_data_file file; 51 struct perf_data data;
52 int idx; 52 int idx;
53 struct hists *hists; 53 struct hists *hists;
54 struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; 54 struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX];
@@ -708,7 +708,7 @@ static void data__fprintf(void)
708 708
709 data__for_each_file(i, d) 709 data__for_each_file(i, d)
710 fprintf(stdout, "# [%d] %s %s\n", 710 fprintf(stdout, "# [%d] %s %s\n",
711 d->idx, d->file.path, 711 d->idx, d->data.file.path,
712 !d->idx ? "(Baseline)" : ""); 712 !d->idx ? "(Baseline)" : "");
713 713
714 fprintf(stdout, "#\n"); 714 fprintf(stdout, "#\n");
@@ -777,16 +777,16 @@ static int __cmd_diff(void)
777 int ret = -EINVAL, i; 777 int ret = -EINVAL, i;
778 778
779 data__for_each_file(i, d) { 779 data__for_each_file(i, d) {
780 d->session = perf_session__new(&d->file, false, &tool); 780 d->session = perf_session__new(&d->data, false, &tool);
781 if (!d->session) { 781 if (!d->session) {
782 pr_err("Failed to open %s\n", d->file.path); 782 pr_err("Failed to open %s\n", d->data.file.path);
783 ret = -1; 783 ret = -1;
784 goto out_delete; 784 goto out_delete;
785 } 785 }
786 786
787 ret = perf_session__process_events(d->session); 787 ret = perf_session__process_events(d->session);
788 if (ret) { 788 if (ret) {
789 pr_err("Failed to process %s\n", d->file.path); 789 pr_err("Failed to process %s\n", d->data.file.path);
790 goto out_delete; 790 goto out_delete;
791 } 791 }
792 792
@@ -1287,11 +1287,11 @@ static int data_init(int argc, const char **argv)
1287 return -ENOMEM; 1287 return -ENOMEM;
1288 1288
1289 data__for_each_file(i, d) { 1289 data__for_each_file(i, d) {
1290 struct perf_data_file *file = &d->file; 1290 struct perf_data *data = &d->data;
1291 1291
1292 file->path = use_default ? defaults[i] : argv[i]; 1292 data->file.path = use_default ? defaults[i] : argv[i];
1293 file->mode = PERF_DATA_MODE_READ, 1293 data->mode = PERF_DATA_MODE_READ,
1294 file->force = force, 1294 data->force = force,
1295 1295
1296 d->idx = i; 1296 d->idx = i;
1297 } 1297 }
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index cdd145613f60..e06e822ce634 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -22,14 +22,16 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
22{ 22{
23 struct perf_session *session; 23 struct perf_session *session;
24 struct perf_evsel *pos; 24 struct perf_evsel *pos;
25 struct perf_data_file file = { 25 struct perf_data data = {
26 .path = file_name, 26 .file = {
27 .mode = PERF_DATA_MODE_READ, 27 .path = file_name,
28 .force = details->force, 28 },
29 .mode = PERF_DATA_MODE_READ,
30 .force = details->force,
29 }; 31 };
30 bool has_tracepoint = false; 32 bool has_tracepoint = false;
31 33
32 session = perf_session__new(&file, 0, NULL); 34 session = perf_session__new(&data, 0, NULL);
33 if (session == NULL) 35 if (session == NULL)
34 return -1; 36 return -1;
35 37
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 3e0e73b0dc67..16a28547ca86 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -36,7 +36,7 @@ struct perf_inject {
36 bool strip; 36 bool strip;
37 bool jit_mode; 37 bool jit_mode;
38 const char *input_name; 38 const char *input_name;
39 struct perf_data_file output; 39 struct perf_data output;
40 u64 bytes_written; 40 u64 bytes_written;
41 u64 aux_id; 41 u64 aux_id;
42 struct list_head samples; 42 struct list_head samples;
@@ -53,7 +53,7 @@ static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
53{ 53{
54 ssize_t size; 54 ssize_t size;
55 55
56 size = perf_data_file__write(&inject->output, buf, sz); 56 size = perf_data__write(&inject->output, buf, sz);
57 if (size < 0) 57 if (size < 0)
58 return -errno; 58 return -errno;
59 59
@@ -146,7 +146,7 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
146 if (!inject->output.is_pipe) { 146 if (!inject->output.is_pipe) {
147 off_t offset; 147 off_t offset;
148 148
149 offset = lseek(inject->output.fd, 0, SEEK_CUR); 149 offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
150 if (offset == -1) 150 if (offset == -1)
151 return -errno; 151 return -errno;
152 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 152 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
@@ -155,11 +155,11 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
155 return ret; 155 return ret;
156 } 156 }
157 157
158 if (perf_data_file__is_pipe(session->file) || !session->one_mmap) { 158 if (perf_data__is_pipe(session->data) || !session->one_mmap) {
159 ret = output_bytes(inject, event, event->header.size); 159 ret = output_bytes(inject, event, event->header.size);
160 if (ret < 0) 160 if (ret < 0)
161 return ret; 161 return ret;
162 ret = copy_bytes(inject, perf_data_file__fd(session->file), 162 ret = copy_bytes(inject, perf_data__fd(session->data),
163 event->auxtrace.size); 163 event->auxtrace.size);
164 } else { 164 } else {
165 ret = output_bytes(inject, event, 165 ret = output_bytes(inject, event,
@@ -638,8 +638,8 @@ static int __cmd_inject(struct perf_inject *inject)
638{ 638{
639 int ret = -EINVAL; 639 int ret = -EINVAL;
640 struct perf_session *session = inject->session; 640 struct perf_session *session = inject->session;
641 struct perf_data_file *file_out = &inject->output; 641 struct perf_data *data_out = &inject->output;
642 int fd = perf_data_file__fd(file_out); 642 int fd = perf_data__fd(data_out);
643 u64 output_data_offset; 643 u64 output_data_offset;
644 644
645 signal(SIGINT, sig_handler); 645 signal(SIGINT, sig_handler);
@@ -694,14 +694,14 @@ static int __cmd_inject(struct perf_inject *inject)
694 if (!inject->itrace_synth_opts.set) 694 if (!inject->itrace_synth_opts.set)
695 auxtrace_index__free(&session->auxtrace_index); 695 auxtrace_index__free(&session->auxtrace_index);
696 696
697 if (!file_out->is_pipe) 697 if (!data_out->is_pipe)
698 lseek(fd, output_data_offset, SEEK_SET); 698 lseek(fd, output_data_offset, SEEK_SET);
699 699
700 ret = perf_session__process_events(session); 700 ret = perf_session__process_events(session);
701 if (ret) 701 if (ret)
702 return ret; 702 return ret;
703 703
704 if (!file_out->is_pipe) { 704 if (!data_out->is_pipe) {
705 if (inject->build_ids) 705 if (inject->build_ids)
706 perf_header__set_feat(&session->header, 706 perf_header__set_feat(&session->header,
707 HEADER_BUILD_ID); 707 HEADER_BUILD_ID);
@@ -776,11 +776,13 @@ int cmd_inject(int argc, const char **argv)
776 .input_name = "-", 776 .input_name = "-",
777 .samples = LIST_HEAD_INIT(inject.samples), 777 .samples = LIST_HEAD_INIT(inject.samples),
778 .output = { 778 .output = {
779 .path = "-", 779 .file = {
780 .mode = PERF_DATA_MODE_WRITE, 780 .path = "-",
781 },
782 .mode = PERF_DATA_MODE_WRITE,
781 }, 783 },
782 }; 784 };
783 struct perf_data_file file = { 785 struct perf_data data = {
784 .mode = PERF_DATA_MODE_READ, 786 .mode = PERF_DATA_MODE_READ,
785 }; 787 };
786 int ret; 788 int ret;
@@ -790,7 +792,7 @@ int cmd_inject(int argc, const char **argv)
790 "Inject build-ids into the output stream"), 792 "Inject build-ids into the output stream"),
791 OPT_STRING('i', "input", &inject.input_name, "file", 793 OPT_STRING('i', "input", &inject.input_name, "file",
792 "input file name"), 794 "input file name"),
793 OPT_STRING('o', "output", &inject.output.path, "file", 795 OPT_STRING('o', "output", &inject.output.file.path, "file",
794 "output file name"), 796 "output file name"),
795 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 797 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
796 "Merge sched-stat and sched-switch for getting events " 798 "Merge sched-stat and sched-switch for getting events "
@@ -802,7 +804,7 @@ int cmd_inject(int argc, const char **argv)
802 "be more verbose (show build ids, etc)"), 804 "be more verbose (show build ids, etc)"),
803 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 805 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
804 "kallsyms pathname"), 806 "kallsyms pathname"),
805 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 807 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
806 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 808 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
807 NULL, "opts", "Instruction Tracing options", 809 NULL, "opts", "Instruction Tracing options",
808 itrace_parse_synth_opts), 810 itrace_parse_synth_opts),
@@ -830,15 +832,15 @@ int cmd_inject(int argc, const char **argv)
830 return -1; 832 return -1;
831 } 833 }
832 834
833 if (perf_data_file__open(&inject.output)) { 835 if (perf_data__open(&inject.output)) {
834 perror("failed to create output file"); 836 perror("failed to create output file");
835 return -1; 837 return -1;
836 } 838 }
837 839
838 inject.tool.ordered_events = inject.sched_stat; 840 inject.tool.ordered_events = inject.sched_stat;
839 841
840 file.path = inject.input_name; 842 data.file.path = inject.input_name;
841 inject.session = perf_session__new(&file, true, &inject.tool); 843 inject.session = perf_session__new(&data, true, &inject.tool);
842 if (inject.session == NULL) 844 if (inject.session == NULL)
843 return -1; 845 return -1;
844 846
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 35d4b9c9a9e8..557d391f564a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1894,7 +1894,7 @@ int cmd_kmem(int argc, const char **argv)
1894{ 1894{
1895 const char * const default_slab_sort = "frag,hit,bytes"; 1895 const char * const default_slab_sort = "frag,hit,bytes";
1896 const char * const default_page_sort = "bytes,hit"; 1896 const char * const default_page_sort = "bytes,hit";
1897 struct perf_data_file file = { 1897 struct perf_data data = {
1898 .mode = PERF_DATA_MODE_READ, 1898 .mode = PERF_DATA_MODE_READ,
1899 }; 1899 };
1900 const struct option kmem_options[] = { 1900 const struct option kmem_options[] = {
@@ -1910,7 +1910,7 @@ int cmd_kmem(int argc, const char **argv)
1910 "page, order, migtype, gfp", parse_sort_opt), 1910 "page, order, migtype, gfp", parse_sort_opt),
1911 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), 1911 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1912 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 1912 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1913 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 1913 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
1914 OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", 1914 OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1915 parse_slab_opt), 1915 parse_slab_opt),
1916 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", 1916 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
@@ -1950,9 +1950,9 @@ int cmd_kmem(int argc, const char **argv)
1950 return __cmd_record(argc, argv); 1950 return __cmd_record(argc, argv);
1951 } 1951 }
1952 1952
1953 file.path = input_name; 1953 data.file.path = input_name;
1954 1954
1955 kmem_session = session = perf_session__new(&file, false, &perf_kmem); 1955 kmem_session = session = perf_session__new(&data, false, &perf_kmem);
1956 if (session == NULL) 1956 if (session == NULL)
1957 return -1; 1957 return -1;
1958 1958
@@ -1984,7 +1984,8 @@ int cmd_kmem(int argc, const char **argv)
1984 1984
1985 if (perf_time__parse_str(&ptime, time_str) != 0) { 1985 if (perf_time__parse_str(&ptime, time_str) != 0) {
1986 pr_err("Invalid time string\n"); 1986 pr_err("Invalid time string\n");
1987 return -EINVAL; 1987 ret = -EINVAL;
1988 goto out_delete;
1988 } 1989 }
1989 1990
1990 if (!strcmp(argv[0], "stat")) { 1991 if (!strcmp(argv[0], "stat")) {
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 5fb40368d5d1..0c36f2ac6a0e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -35,7 +35,6 @@
35#include <termios.h> 35#include <termios.h>
36#include <semaphore.h> 36#include <semaphore.h>
37#include <signal.h> 37#include <signal.h>
38#include <pthread.h>
39#include <math.h> 38#include <math.h>
40 39
41static const char *get_filename_for_perf_kvm(void) 40static const char *get_filename_for_perf_kvm(void)
@@ -1069,10 +1068,12 @@ static int read_events(struct perf_kvm_stat *kvm)
1069 .namespaces = perf_event__process_namespaces, 1068 .namespaces = perf_event__process_namespaces,
1070 .ordered_events = true, 1069 .ordered_events = true,
1071 }; 1070 };
1072 struct perf_data_file file = { 1071 struct perf_data file = {
1073 .path = kvm->file_name, 1072 .file = {
1074 .mode = PERF_DATA_MODE_READ, 1073 .path = kvm->file_name,
1075 .force = kvm->force, 1074 },
1075 .mode = PERF_DATA_MODE_READ,
1076 .force = kvm->force,
1076 }; 1077 };
1077 1078
1078 kvm->tool = eops; 1079 kvm->tool = eops;
@@ -1360,7 +1361,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1360 "perf kvm stat live [<options>]", 1361 "perf kvm stat live [<options>]",
1361 NULL 1362 NULL
1362 }; 1363 };
1363 struct perf_data_file file = { 1364 struct perf_data data = {
1364 .mode = PERF_DATA_MODE_WRITE, 1365 .mode = PERF_DATA_MODE_WRITE,
1365 }; 1366 };
1366 1367
@@ -1434,7 +1435,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1434 /* 1435 /*
1435 * perf session 1436 * perf session
1436 */ 1437 */
1437 kvm->session = perf_session__new(&file, false, &kvm->tool); 1438 kvm->session = perf_session__new(&data, false, &kvm->tool);
1438 if (kvm->session == NULL) { 1439 if (kvm->session == NULL) {
1439 err = -1; 1440 err = -1;
1440 goto out; 1441 goto out;
@@ -1443,7 +1444,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1443 perf_session__set_id_hdr_size(kvm->session); 1444 perf_session__set_id_hdr_size(kvm->session);
1444 ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); 1445 ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
1445 machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, 1446 machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
1446 kvm->evlist->threads, false, kvm->opts.proc_map_timeout); 1447 kvm->evlist->threads, false,
1448 kvm->opts.proc_map_timeout, 1);
1447 err = kvm_live_open_events(kvm); 1449 err = kvm_live_open_events(kvm);
1448 if (err) 1450 if (err)
1449 goto out; 1451 goto out;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index eeedbe433776..ead221e49f00 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -16,6 +16,7 @@
16#include "util/cache.h" 16#include "util/cache.h"
17#include "util/pmu.h" 17#include "util/pmu.h"
18#include "util/debug.h" 18#include "util/debug.h"
19#include "util/metricgroup.h"
19#include <subcmd/parse-options.h> 20#include <subcmd/parse-options.h>
20 21
21static bool desc_flag = true; 22static bool desc_flag = true;
@@ -80,6 +81,10 @@ int cmd_list(int argc, const char **argv)
80 long_desc_flag, details_flag); 81 long_desc_flag, details_flag);
81 else if (strcmp(argv[i], "sdt") == 0) 82 else if (strcmp(argv[i], "sdt") == 0)
82 print_sdt_events(NULL, NULL, raw_dump); 83 print_sdt_events(NULL, NULL, raw_dump);
84 else if (strcmp(argv[i], "metric") == 0)
85 metricgroup__print(true, false, NULL, raw_dump);
86 else if (strcmp(argv[i], "metricgroup") == 0)
87 metricgroup__print(false, true, NULL, raw_dump);
83 else if ((sep = strchr(argv[i], ':')) != NULL) { 88 else if ((sep = strchr(argv[i], ':')) != NULL) {
84 int sep_idx; 89 int sep_idx;
85 90
@@ -97,6 +102,7 @@ int cmd_list(int argc, const char **argv)
97 s[sep_idx] = '\0'; 102 s[sep_idx] = '\0';
98 print_tracepoint_events(s, s + sep_idx + 1, raw_dump); 103 print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
99 print_sdt_events(s, s + sep_idx + 1, raw_dump); 104 print_sdt_events(s, s + sep_idx + 1, raw_dump);
105 metricgroup__print(true, true, s, raw_dump);
100 free(s); 106 free(s);
101 } else { 107 } else {
102 if (asprintf(&s, "*%s*", argv[i]) < 0) { 108 if (asprintf(&s, "*%s*", argv[i]) < 0) {
@@ -113,6 +119,7 @@ int cmd_list(int argc, const char **argv)
113 details_flag); 119 details_flag);
114 print_tracepoint_events(NULL, s, raw_dump); 120 print_tracepoint_events(NULL, s, raw_dump);
115 print_sdt_events(NULL, s, raw_dump); 121 print_sdt_events(NULL, s, raw_dump);
122 metricgroup__print(true, true, NULL, raw_dump);
116 free(s); 123 free(s);
117 } 124 }
118 } 125 }
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fe69cd6b89e1..6e0189df2b3b 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -865,13 +865,15 @@ static int __cmd_report(bool display_info)
865 .namespaces = perf_event__process_namespaces, 865 .namespaces = perf_event__process_namespaces,
866 .ordered_events = true, 866 .ordered_events = true,
867 }; 867 };
868 struct perf_data_file file = { 868 struct perf_data data = {
869 .path = input_name, 869 .file = {
870 .mode = PERF_DATA_MODE_READ, 870 .path = input_name,
871 .force = force, 871 },
872 .mode = PERF_DATA_MODE_READ,
873 .force = force,
872 }; 874 };
873 875
874 session = perf_session__new(&file, false, &eops); 876 session = perf_session__new(&data, false, &eops);
875 if (!session) { 877 if (!session) {
876 pr_err("Initializing perf session failed\n"); 878 pr_err("Initializing perf session failed\n");
877 return -1; 879 return -1;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 4db960085273..506564651cda 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -113,6 +113,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
113 if (!perf_mem_events[j].supported) { 113 if (!perf_mem_events[j].supported) {
114 pr_err("failed: event '%s' not supported\n", 114 pr_err("failed: event '%s' not supported\n",
115 perf_mem_events__name(j)); 115 perf_mem_events__name(j));
116 free(rec_argv);
116 return -1; 117 return -1;
117 } 118 }
118 119
@@ -236,13 +237,15 @@ static int process_sample_event(struct perf_tool *tool,
236 237
237static int report_raw_events(struct perf_mem *mem) 238static int report_raw_events(struct perf_mem *mem)
238{ 239{
239 struct perf_data_file file = { 240 struct perf_data data = {
240 .path = input_name, 241 .file = {
241 .mode = PERF_DATA_MODE_READ, 242 .path = input_name,
242 .force = mem->force, 243 },
244 .mode = PERF_DATA_MODE_READ,
245 .force = mem->force,
243 }; 246 };
244 int ret; 247 int ret;
245 struct perf_session *session = perf_session__new(&file, false, 248 struct perf_session *session = perf_session__new(&data, false,
246 &mem->tool); 249 &mem->tool);
247 250
248 if (session == NULL) 251 if (session == NULL)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0c95ffefb6cc..3d7f33e19df2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -67,7 +67,7 @@ struct record {
67 struct perf_tool tool; 67 struct perf_tool tool;
68 struct record_opts opts; 68 struct record_opts opts;
69 u64 bytes_written; 69 u64 bytes_written;
70 struct perf_data_file file; 70 struct perf_data data;
71 struct auxtrace_record *itr; 71 struct auxtrace_record *itr;
72 struct perf_evlist *evlist; 72 struct perf_evlist *evlist;
73 struct perf_session *session; 73 struct perf_session *session;
@@ -108,7 +108,7 @@ static bool switch_output_time(struct record *rec)
108 108
109static int record__write(struct record *rec, void *bf, size_t size) 109static int record__write(struct record *rec, void *bf, size_t size)
110{ 110{
111 if (perf_data_file__write(rec->session->file, bf, size) < 0) { 111 if (perf_data__write(rec->session->data, bf, size) < 0) {
112 pr_err("failed to write perf data, error: %m\n"); 112 pr_err("failed to write perf data, error: %m\n");
113 return -1; 113 return -1;
114 } 114 }
@@ -130,107 +130,12 @@ static int process_synthesized_event(struct perf_tool *tool,
130 return record__write(rec, event, event->header.size); 130 return record__write(rec, event, event->header.size);
131} 131}
132 132
133static int 133static int record__pushfn(void *to, void *bf, size_t size)
134backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
135{ 134{
136 struct perf_event_header *pheader; 135 struct record *rec = to;
137 u64 evt_head = head;
138 int size = mask + 1;
139
140 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
141 pheader = (struct perf_event_header *)(buf + (head & mask));
142 *start = head;
143 while (true) {
144 if (evt_head - head >= (unsigned int)size) {
145 pr_debug("Finished reading backward ring buffer: rewind\n");
146 if (evt_head - head > (unsigned int)size)
147 evt_head -= pheader->size;
148 *end = evt_head;
149 return 0;
150 }
151
152 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
153
154 if (pheader->size == 0) {
155 pr_debug("Finished reading backward ring buffer: get start\n");
156 *end = evt_head;
157 return 0;
158 }
159
160 evt_head += pheader->size;
161 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
162 }
163 WARN_ONCE(1, "Shouldn't get here\n");
164 return -1;
165}
166
167static int
168rb_find_range(void *data, int mask, u64 head, u64 old,
169 u64 *start, u64 *end, bool backward)
170{
171 if (!backward) {
172 *start = old;
173 *end = head;
174 return 0;
175 }
176
177 return backward_rb_find_range(data, mask, head, start, end);
178}
179
180static int
181record__mmap_read(struct record *rec, struct perf_mmap *md,
182 bool overwrite, bool backward)
183{
184 u64 head = perf_mmap__read_head(md);
185 u64 old = md->prev;
186 u64 end = head, start = old;
187 unsigned char *data = md->base + page_size;
188 unsigned long size;
189 void *buf;
190 int rc = 0;
191
192 if (rb_find_range(data, md->mask, head,
193 old, &start, &end, backward))
194 return -1;
195
196 if (start == end)
197 return 0;
198 136
199 rec->samples++; 137 rec->samples++;
200 138 return record__write(rec, bf, size);
201 size = end - start;
202 if (size > (unsigned long)(md->mask) + 1) {
203 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
204
205 md->prev = head;
206 perf_mmap__consume(md, overwrite || backward);
207 return 0;
208 }
209
210 if ((start & md->mask) + size != (end & md->mask)) {
211 buf = &data[start & md->mask];
212 size = md->mask + 1 - (start & md->mask);
213 start += size;
214
215 if (record__write(rec, buf, size) < 0) {
216 rc = -1;
217 goto out;
218 }
219 }
220
221 buf = &data[start & md->mask];
222 size = end - start;
223 start += size;
224
225 if (record__write(rec, buf, size) < 0) {
226 rc = -1;
227 goto out;
228 }
229
230 md->prev = head;
231 perf_mmap__consume(md, overwrite || backward);
232out:
233 return rc;
234} 139}
235 140
236static volatile int done; 141static volatile int done;
@@ -269,13 +174,13 @@ static int record__process_auxtrace(struct perf_tool *tool,
269 size_t len1, void *data2, size_t len2) 174 size_t len1, void *data2, size_t len2)
270{ 175{
271 struct record *rec = container_of(tool, struct record, tool); 176 struct record *rec = container_of(tool, struct record, tool);
272 struct perf_data_file *file = &rec->file; 177 struct perf_data *data = &rec->data;
273 size_t padding; 178 size_t padding;
274 u8 pad[8] = {0}; 179 u8 pad[8] = {0};
275 180
276 if (!perf_data_file__is_pipe(file)) { 181 if (!perf_data__is_pipe(data)) {
277 off_t file_offset; 182 off_t file_offset;
278 int fd = perf_data_file__fd(file); 183 int fd = perf_data__fd(data);
279 int err; 184 int err;
280 185
281 file_offset = lseek(fd, 0, SEEK_CUR); 186 file_offset = lseek(fd, 0, SEEK_CUR);
@@ -494,10 +399,10 @@ static int process_sample_event(struct perf_tool *tool,
494 399
495static int process_buildids(struct record *rec) 400static int process_buildids(struct record *rec)
496{ 401{
497 struct perf_data_file *file = &rec->file; 402 struct perf_data *data = &rec->data;
498 struct perf_session *session = rec->session; 403 struct perf_session *session = rec->session;
499 404
500 if (file->size == 0) 405 if (data->size == 0)
501 return 0; 406 return 0;
502 407
503 /* 408 /*
@@ -577,8 +482,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
577 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 482 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
578 483
579 if (maps[i].base) { 484 if (maps[i].base) {
580 if (record__mmap_read(rec, &maps[i], 485 if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) {
581 evlist->overwrite, backward) != 0) {
582 rc = -1; 486 rc = -1;
583 goto out; 487 goto out;
584 } 488 }
@@ -641,14 +545,14 @@ static void record__init_features(struct record *rec)
641static void 545static void
642record__finish_output(struct record *rec) 546record__finish_output(struct record *rec)
643{ 547{
644 struct perf_data_file *file = &rec->file; 548 struct perf_data *data = &rec->data;
645 int fd = perf_data_file__fd(file); 549 int fd = perf_data__fd(data);
646 550
647 if (file->is_pipe) 551 if (data->is_pipe)
648 return; 552 return;
649 553
650 rec->session->header.data_size += rec->bytes_written; 554 rec->session->header.data_size += rec->bytes_written;
651 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 555 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
652 556
653 if (!rec->no_buildid) { 557 if (!rec->no_buildid) {
654 process_buildids(rec); 558 process_buildids(rec);
@@ -687,7 +591,7 @@ static int record__synthesize(struct record *rec, bool tail);
687static int 591static int
688record__switch_output(struct record *rec, bool at_exit) 592record__switch_output(struct record *rec, bool at_exit)
689{ 593{
690 struct perf_data_file *file = &rec->file; 594 struct perf_data *data = &rec->data;
691 int fd, err; 595 int fd, err;
692 596
693 /* Same Size: "2015122520103046"*/ 597 /* Same Size: "2015122520103046"*/
@@ -705,7 +609,7 @@ record__switch_output(struct record *rec, bool at_exit)
705 return -EINVAL; 609 return -EINVAL;
706 } 610 }
707 611
708 fd = perf_data_file__switch(file, timestamp, 612 fd = perf_data__switch(data, timestamp,
709 rec->session->header.data_offset, 613 rec->session->header.data_offset,
710 at_exit); 614 at_exit);
711 if (fd >= 0 && !at_exit) { 615 if (fd >= 0 && !at_exit) {
@@ -715,7 +619,7 @@ record__switch_output(struct record *rec, bool at_exit)
715 619
716 if (!quiet) 620 if (!quiet)
717 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 621 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
718 file->path, timestamp); 622 data->file.path, timestamp);
719 623
720 /* Output tracking events */ 624 /* Output tracking events */
721 if (!at_exit) { 625 if (!at_exit) {
@@ -790,16 +694,16 @@ static int record__synthesize(struct record *rec, bool tail)
790{ 694{
791 struct perf_session *session = rec->session; 695 struct perf_session *session = rec->session;
792 struct machine *machine = &session->machines.host; 696 struct machine *machine = &session->machines.host;
793 struct perf_data_file *file = &rec->file; 697 struct perf_data *data = &rec->data;
794 struct record_opts *opts = &rec->opts; 698 struct record_opts *opts = &rec->opts;
795 struct perf_tool *tool = &rec->tool; 699 struct perf_tool *tool = &rec->tool;
796 int fd = perf_data_file__fd(file); 700 int fd = perf_data__fd(data);
797 int err = 0; 701 int err = 0;
798 702
799 if (rec->opts.tail_synthesize != tail) 703 if (rec->opts.tail_synthesize != tail)
800 return 0; 704 return 0;
801 705
802 if (file->is_pipe) { 706 if (data->is_pipe) {
803 err = perf_event__synthesize_features( 707 err = perf_event__synthesize_features(
804 tool, session, rec->evlist, process_synthesized_event); 708 tool, session, rec->evlist, process_synthesized_event);
805 if (err < 0) { 709 if (err < 0) {
@@ -864,7 +768,7 @@ static int record__synthesize(struct record *rec, bool tail)
864 768
865 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 769 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
866 process_synthesized_event, opts->sample_address, 770 process_synthesized_event, opts->sample_address,
867 opts->proc_map_timeout); 771 opts->proc_map_timeout, 1);
868out: 772out:
869 return err; 773 return err;
870} 774}
@@ -878,7 +782,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
878 struct machine *machine; 782 struct machine *machine;
879 struct perf_tool *tool = &rec->tool; 783 struct perf_tool *tool = &rec->tool;
880 struct record_opts *opts = &rec->opts; 784 struct record_opts *opts = &rec->opts;
881 struct perf_data_file *file = &rec->file; 785 struct perf_data *data = &rec->data;
882 struct perf_session *session; 786 struct perf_session *session;
883 bool disabled = false, draining = false; 787 bool disabled = false, draining = false;
884 int fd; 788 int fd;
@@ -904,20 +808,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
904 signal(SIGUSR2, SIG_IGN); 808 signal(SIGUSR2, SIG_IGN);
905 } 809 }
906 810
907 session = perf_session__new(file, false, tool); 811 session = perf_session__new(data, false, tool);
908 if (session == NULL) { 812 if (session == NULL) {
909 pr_err("Perf session creation failed.\n"); 813 pr_err("Perf session creation failed.\n");
910 return -1; 814 return -1;
911 } 815 }
912 816
913 fd = perf_data_file__fd(file); 817 fd = perf_data__fd(data);
914 rec->session = session; 818 rec->session = session;
915 819
916 record__init_features(rec); 820 record__init_features(rec);
917 821
918 if (forks) { 822 if (forks) {
919 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 823 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
920 argv, file->is_pipe, 824 argv, data->is_pipe,
921 workload_exec_failed_signal); 825 workload_exec_failed_signal);
922 if (err < 0) { 826 if (err < 0) {
923 pr_err("Couldn't run the workload!\n"); 827 pr_err("Couldn't run the workload!\n");
@@ -953,7 +857,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
953 if (!rec->evlist->nr_groups) 857 if (!rec->evlist->nr_groups)
954 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 858 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
955 859
956 if (file->is_pipe) { 860 if (data->is_pipe) {
957 err = perf_header__write_pipe(fd); 861 err = perf_header__write_pipe(fd);
958 if (err < 0) 862 if (err < 0)
959 goto out_child; 863 goto out_child;
@@ -1214,8 +1118,8 @@ out_child:
1214 samples[0] = '\0'; 1118 samples[0] = '\0';
1215 1119
1216 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1120 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1217 perf_data_file__size(file) / 1024.0 / 1024.0, 1121 perf_data__size(data) / 1024.0 / 1024.0,
1218 file->path, postfix, samples); 1122 data->file.path, postfix, samples);
1219 } 1123 }
1220 1124
1221out_delete_session: 1125out_delete_session:
@@ -1579,7 +1483,7 @@ static struct option __record_options[] = {
1579 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1483 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1580 "list of cpus to monitor"), 1484 "list of cpus to monitor"),
1581 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1485 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1582 OPT_STRING('o', "output", &record.file.path, "file", 1486 OPT_STRING('o', "output", &record.data.file.path, "file",
1583 "output file name"), 1487 "output file name"),
1584 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1488 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1585 &record.opts.no_inherit_set, 1489 &record.opts.no_inherit_set,
@@ -1644,6 +1548,9 @@ static struct option __record_options[] = {
1644 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1548 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1645 "sample selected machine registers on interrupt," 1549 "sample selected machine registers on interrupt,"
1646 " use -I ? to list register names", parse_regs), 1550 " use -I ? to list register names", parse_regs),
1551 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1552 "sample selected machine registers on interrupt,"
1553 " use -I ? to list register names", parse_regs),
1647 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1554 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1648 "Record running/enabled time of read (:S) events"), 1555 "Record running/enabled time of read (:S) events"),
1649 OPT_CALLBACK('k', "clockid", &record.opts, 1556 OPT_CALLBACK('k', "clockid", &record.opts,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index fae4b0340750..1394cd8d96f7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -258,7 +258,7 @@ static int report__setup_sample_type(struct report *rep)
258{ 258{
259 struct perf_session *session = rep->session; 259 struct perf_session *session = rep->session;
260 u64 sample_type = perf_evlist__combined_sample_type(session->evlist); 260 u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
261 bool is_pipe = perf_data_file__is_pipe(session->file); 261 bool is_pipe = perf_data__is_pipe(session->data);
262 262
263 if (session->itrace_synth_opts->callchain || 263 if (session->itrace_synth_opts->callchain ||
264 (!is_pipe && 264 (!is_pipe &&
@@ -569,7 +569,7 @@ static int __cmd_report(struct report *rep)
569 int ret; 569 int ret;
570 struct perf_session *session = rep->session; 570 struct perf_session *session = rep->session;
571 struct perf_evsel *pos; 571 struct perf_evsel *pos;
572 struct perf_data_file *file = session->file; 572 struct perf_data *data = session->data;
573 573
574 signal(SIGINT, sig_handler); 574 signal(SIGINT, sig_handler);
575 575
@@ -638,7 +638,7 @@ static int __cmd_report(struct report *rep)
638 rep->nr_entries += evsel__hists(pos)->nr_entries; 638 rep->nr_entries += evsel__hists(pos)->nr_entries;
639 639
640 if (rep->nr_entries == 0) { 640 if (rep->nr_entries == 0) {
641 ui__error("The %s file has no samples!\n", file->path); 641 ui__error("The %s file has no samples!\n", data->file.path);
642 return 0; 642 return 0;
643 } 643 }
644 644
@@ -880,7 +880,7 @@ int cmd_report(int argc, const char **argv)
880 "Show inline function"), 880 "Show inline function"),
881 OPT_END() 881 OPT_END()
882 }; 882 };
883 struct perf_data_file file = { 883 struct perf_data data = {
884 .mode = PERF_DATA_MODE_READ, 884 .mode = PERF_DATA_MODE_READ,
885 }; 885 };
886 int ret = hists__init(); 886 int ret = hists__init();
@@ -941,11 +941,11 @@ int cmd_report(int argc, const char **argv)
941 input_name = "perf.data"; 941 input_name = "perf.data";
942 } 942 }
943 943
944 file.path = input_name; 944 data.file.path = input_name;
945 file.force = symbol_conf.force; 945 data.force = symbol_conf.force;
946 946
947repeat: 947repeat:
948 session = perf_session__new(&file, false, &report.tool); 948 session = perf_session__new(&data, false, &report.tool);
949 if (session == NULL) 949 if (session == NULL)
950 return -1; 950 return -1;
951 951
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index f380d91ee609..83283fedb00f 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1701,14 +1701,16 @@ static int perf_sched__read_events(struct perf_sched *sched)
1701 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1701 { "sched:sched_migrate_task", process_sched_migrate_task_event, },
1702 }; 1702 };
1703 struct perf_session *session; 1703 struct perf_session *session;
1704 struct perf_data_file file = { 1704 struct perf_data data = {
1705 .path = input_name, 1705 .file = {
1706 .mode = PERF_DATA_MODE_READ, 1706 .path = input_name,
1707 .force = sched->force, 1707 },
1708 .mode = PERF_DATA_MODE_READ,
1709 .force = sched->force,
1708 }; 1710 };
1709 int rc = -1; 1711 int rc = -1;
1710 1712
1711 session = perf_session__new(&file, false, &sched->tool); 1713 session = perf_session__new(&data, false, &sched->tool);
1712 if (session == NULL) { 1714 if (session == NULL) {
1713 pr_debug("No Memory for session\n"); 1715 pr_debug("No Memory for session\n");
1714 return -1; 1716 return -1;
@@ -2903,10 +2905,12 @@ static int perf_sched__timehist(struct perf_sched *sched)
2903 const struct perf_evsel_str_handler migrate_handlers[] = { 2905 const struct perf_evsel_str_handler migrate_handlers[] = {
2904 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2906 { "sched:sched_migrate_task", timehist_migrate_task_event, },
2905 }; 2907 };
2906 struct perf_data_file file = { 2908 struct perf_data data = {
2907 .path = input_name, 2909 .file = {
2908 .mode = PERF_DATA_MODE_READ, 2910 .path = input_name,
2909 .force = sched->force, 2911 },
2912 .mode = PERF_DATA_MODE_READ,
2913 .force = sched->force,
2910 }; 2914 };
2911 2915
2912 struct perf_session *session; 2916 struct perf_session *session;
@@ -2931,7 +2935,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
2931 2935
2932 symbol_conf.use_callchain = sched->show_callchain; 2936 symbol_conf.use_callchain = sched->show_callchain;
2933 2937
2934 session = perf_session__new(&file, false, &sched->tool); 2938 session = perf_session__new(&data, false, &sched->tool);
2935 if (session == NULL) 2939 if (session == NULL)
2936 return -ENOMEM; 2940 return -ENOMEM;
2937 2941
@@ -3364,6 +3368,10 @@ int cmd_sched(int argc, const char **argv)
3364 OPT_STRING(0, "time", &sched.time_str, "str", 3368 OPT_STRING(0, "time", &sched.time_str, "str",
3365 "Time span for analysis (start,stop)"), 3369 "Time span for analysis (start,stop)"),
3366 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 3370 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
3371 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
3372 "analyze events only for given process id(s)"),
3373 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
3374 "analyze events only for given thread id(s)"),
3367 OPT_PARENT(sched_options) 3375 OPT_PARENT(sched_options)
3368 }; 3376 };
3369 3377
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 0fe02758de7d..68f36dc0344f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -89,6 +89,7 @@ enum perf_output_field {
89 PERF_OUTPUT_BRSTACKOFF = 1U << 24, 89 PERF_OUTPUT_BRSTACKOFF = 1U << 24,
90 PERF_OUTPUT_SYNTH = 1U << 25, 90 PERF_OUTPUT_SYNTH = 1U << 25,
91 PERF_OUTPUT_PHYS_ADDR = 1U << 26, 91 PERF_OUTPUT_PHYS_ADDR = 1U << 26,
92 PERF_OUTPUT_UREGS = 1U << 27,
92}; 93};
93 94
94struct output_option { 95struct output_option {
@@ -110,6 +111,7 @@ struct output_option {
110 {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, 111 {.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
111 {.str = "period", .field = PERF_OUTPUT_PERIOD}, 112 {.str = "period", .field = PERF_OUTPUT_PERIOD},
112 {.str = "iregs", .field = PERF_OUTPUT_IREGS}, 113 {.str = "iregs", .field = PERF_OUTPUT_IREGS},
114 {.str = "uregs", .field = PERF_OUTPUT_UREGS},
113 {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, 115 {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
114 {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, 116 {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
115 {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, 117 {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
@@ -209,6 +211,51 @@ static struct {
209 }, 211 },
210}; 212};
211 213
214struct perf_evsel_script {
215 char *filename;
216 FILE *fp;
217 u64 samples;
218};
219
220static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
221 struct perf_data *data)
222{
223 struct perf_evsel_script *es = malloc(sizeof(*es));
224
225 if (es != NULL) {
226 if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
227 goto out_free;
228 es->fp = fopen(es->filename, "w");
229 if (es->fp == NULL)
230 goto out_free_filename;
231 es->samples = 0;
232 }
233
234 return es;
235out_free_filename:
236 zfree(&es->filename);
237out_free:
238 free(es);
239 return NULL;
240}
241
242static void perf_evsel_script__delete(struct perf_evsel_script *es)
243{
244 zfree(&es->filename);
245 fclose(es->fp);
246 es->fp = NULL;
247 free(es);
248}
249
250static int perf_evsel_script__fprintf(struct perf_evsel_script *es, FILE *fp)
251{
252 struct stat st;
253
254 fstat(fileno(es->fp), &st);
255 return fprintf(fp, "[ perf script: Wrote %.3f MB %s (%" PRIu64 " samples) ]\n",
256 st.st_size / 1024.0 / 1024.0, es->filename, es->samples);
257}
258
212static inline int output_type(unsigned int type) 259static inline int output_type(unsigned int type)
213{ 260{
214 switch (type) { 261 switch (type) {
@@ -386,6 +433,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
386 PERF_OUTPUT_IREGS)) 433 PERF_OUTPUT_IREGS))
387 return -EINVAL; 434 return -EINVAL;
388 435
436 if (PRINT_FIELD(UREGS) &&
437 perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
438 PERF_OUTPUT_UREGS))
439 return -EINVAL;
440
389 if (PRINT_FIELD(PHYS_ADDR) && 441 if (PRINT_FIELD(PHYS_ADDR) &&
390 perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", 442 perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
391 PERF_OUTPUT_PHYS_ADDR)) 443 PERF_OUTPUT_PHYS_ADDR))
@@ -494,51 +546,76 @@ out:
494 return 0; 546 return 0;
495} 547}
496 548
497static void print_sample_iregs(struct perf_sample *sample, 549static int perf_sample__fprintf_iregs(struct perf_sample *sample,
498 struct perf_event_attr *attr) 550 struct perf_event_attr *attr, FILE *fp)
499{ 551{
500 struct regs_dump *regs = &sample->intr_regs; 552 struct regs_dump *regs = &sample->intr_regs;
501 uint64_t mask = attr->sample_regs_intr; 553 uint64_t mask = attr->sample_regs_intr;
502 unsigned i = 0, r; 554 unsigned i = 0, r;
555 int printed = 0;
503 556
504 if (!regs) 557 if (!regs)
505 return; 558 return 0;
506 559
507 for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { 560 for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
508 u64 val = regs->regs[i++]; 561 u64 val = regs->regs[i++];
509 printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); 562 printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
510 } 563 }
564
565 return printed;
511} 566}
512 567
513static void print_sample_start(struct perf_sample *sample, 568static int perf_sample__fprintf_uregs(struct perf_sample *sample,
514 struct thread *thread, 569 struct perf_event_attr *attr, FILE *fp)
515 struct perf_evsel *evsel) 570{
571 struct regs_dump *regs = &sample->user_regs;
572 uint64_t mask = attr->sample_regs_user;
573 unsigned i = 0, r;
574 int printed = 0;
575
576 if (!regs || !regs->regs)
577 return 0;
578
579 printed += fprintf(fp, " ABI:%" PRIu64 " ", regs->abi);
580
581 for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
582 u64 val = regs->regs[i++];
583 printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
584 }
585
586 return printed;
587}
588
589static int perf_sample__fprintf_start(struct perf_sample *sample,
590 struct thread *thread,
591 struct perf_evsel *evsel, FILE *fp)
516{ 592{
517 struct perf_event_attr *attr = &evsel->attr; 593 struct perf_event_attr *attr = &evsel->attr;
518 unsigned long secs; 594 unsigned long secs;
519 unsigned long long nsecs; 595 unsigned long long nsecs;
596 int printed = 0;
520 597
521 if (PRINT_FIELD(COMM)) { 598 if (PRINT_FIELD(COMM)) {
522 if (latency_format) 599 if (latency_format)
523 printf("%8.8s ", thread__comm_str(thread)); 600 printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
524 else if (PRINT_FIELD(IP) && symbol_conf.use_callchain) 601 else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
525 printf("%s ", thread__comm_str(thread)); 602 printed += fprintf(fp, "%s ", thread__comm_str(thread));
526 else 603 else
527 printf("%16s ", thread__comm_str(thread)); 604 printed += fprintf(fp, "%16s ", thread__comm_str(thread));
528 } 605 }
529 606
530 if (PRINT_FIELD(PID) && PRINT_FIELD(TID)) 607 if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
531 printf("%5d/%-5d ", sample->pid, sample->tid); 608 printed += fprintf(fp, "%5d/%-5d ", sample->pid, sample->tid);
532 else if (PRINT_FIELD(PID)) 609 else if (PRINT_FIELD(PID))
533 printf("%5d ", sample->pid); 610 printed += fprintf(fp, "%5d ", sample->pid);
534 else if (PRINT_FIELD(TID)) 611 else if (PRINT_FIELD(TID))
535 printf("%5d ", sample->tid); 612 printed += fprintf(fp, "%5d ", sample->tid);
536 613
537 if (PRINT_FIELD(CPU)) { 614 if (PRINT_FIELD(CPU)) {
538 if (latency_format) 615 if (latency_format)
539 printf("%3d ", sample->cpu); 616 printed += fprintf(fp, "%3d ", sample->cpu);
540 else 617 else
541 printf("[%03d] ", sample->cpu); 618 printed += fprintf(fp, "[%03d] ", sample->cpu);
542 } 619 }
543 620
544 if (PRINT_FIELD(TIME)) { 621 if (PRINT_FIELD(TIME)) {
@@ -547,13 +624,15 @@ static void print_sample_start(struct perf_sample *sample,
547 nsecs -= secs * NSEC_PER_SEC; 624 nsecs -= secs * NSEC_PER_SEC;
548 625
549 if (nanosecs) 626 if (nanosecs)
550 printf("%5lu.%09llu: ", secs, nsecs); 627 printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs);
551 else { 628 else {
552 char sample_time[32]; 629 char sample_time[32];
553 timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); 630 timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time));
554 printf("%12s: ", sample_time); 631 printed += fprintf(fp, "%12s: ", sample_time);
555 } 632 }
556 } 633 }
634
635 return printed;
557} 636}
558 637
559static inline char 638static inline char
@@ -565,16 +644,17 @@ mispred_str(struct branch_entry *br)
565 return br->flags.predicted ? 'P' : 'M'; 644 return br->flags.predicted ? 'P' : 'M';
566} 645}
567 646
568static void print_sample_brstack(struct perf_sample *sample, 647static int perf_sample__fprintf_brstack(struct perf_sample *sample,
569 struct thread *thread, 648 struct thread *thread,
570 struct perf_event_attr *attr) 649 struct perf_event_attr *attr, FILE *fp)
571{ 650{
572 struct branch_stack *br = sample->branch_stack; 651 struct branch_stack *br = sample->branch_stack;
573 struct addr_location alf, alt; 652 struct addr_location alf, alt;
574 u64 i, from, to; 653 u64 i, from, to;
654 int printed = 0;
575 655
576 if (!(br && br->nr)) 656 if (!(br && br->nr))
577 return; 657 return 0;
578 658
579 for (i = 0; i < br->nr; i++) { 659 for (i = 0; i < br->nr; i++) {
580 from = br->entries[i].from; 660 from = br->entries[i].from;
@@ -587,38 +667,41 @@ static void print_sample_brstack(struct perf_sample *sample,
587 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); 667 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
588 } 668 }
589 669
590 printf(" 0x%"PRIx64, from); 670 printed += fprintf(fp, " 0x%"PRIx64, from);
591 if (PRINT_FIELD(DSO)) { 671 if (PRINT_FIELD(DSO)) {
592 printf("("); 672 printed += fprintf(fp, "(");
593 map__fprintf_dsoname(alf.map, stdout); 673 printed += map__fprintf_dsoname(alf.map, fp);
594 printf(")"); 674 printed += fprintf(fp, ")");
595 } 675 }
596 676
597 printf("/0x%"PRIx64, to); 677 printed += fprintf(fp, "/0x%"PRIx64, to);
598 if (PRINT_FIELD(DSO)) { 678 if (PRINT_FIELD(DSO)) {
599 printf("("); 679 printed += fprintf(fp, "(");
600 map__fprintf_dsoname(alt.map, stdout); 680 printed += map__fprintf_dsoname(alt.map, fp);
601 printf(")"); 681 printed += fprintf(fp, ")");
602 } 682 }
603 683
604 printf("/%c/%c/%c/%d ", 684 printed += fprintf(fp, "/%c/%c/%c/%d ",
605 mispred_str( br->entries + i), 685 mispred_str( br->entries + i),
606 br->entries[i].flags.in_tx? 'X' : '-', 686 br->entries[i].flags.in_tx? 'X' : '-',
607 br->entries[i].flags.abort? 'A' : '-', 687 br->entries[i].flags.abort? 'A' : '-',
608 br->entries[i].flags.cycles); 688 br->entries[i].flags.cycles);
609 } 689 }
690
691 return printed;
610} 692}
611 693
612static void print_sample_brstacksym(struct perf_sample *sample, 694static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
613 struct thread *thread, 695 struct thread *thread,
614 struct perf_event_attr *attr) 696 struct perf_event_attr *attr, FILE *fp)
615{ 697{
616 struct branch_stack *br = sample->branch_stack; 698 struct branch_stack *br = sample->branch_stack;
617 struct addr_location alf, alt; 699 struct addr_location alf, alt;
618 u64 i, from, to; 700 u64 i, from, to;
701 int printed = 0;
619 702
620 if (!(br && br->nr)) 703 if (!(br && br->nr))
621 return; 704 return 0;
622 705
623 for (i = 0; i < br->nr; i++) { 706 for (i = 0; i < br->nr; i++) {
624 707
@@ -635,37 +718,40 @@ static void print_sample_brstacksym(struct perf_sample *sample,
635 if (alt.map) 718 if (alt.map)
636 alt.sym = map__find_symbol(alt.map, alt.addr); 719 alt.sym = map__find_symbol(alt.map, alt.addr);
637 720
638 symbol__fprintf_symname_offs(alf.sym, &alf, stdout); 721 printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
639 if (PRINT_FIELD(DSO)) { 722 if (PRINT_FIELD(DSO)) {
640 printf("("); 723 printed += fprintf(fp, "(");
641 map__fprintf_dsoname(alf.map, stdout); 724 printed += map__fprintf_dsoname(alf.map, fp);
642 printf(")"); 725 printed += fprintf(fp, ")");
643 } 726 }
644 putchar('/'); 727 printed += fprintf(fp, "%c", '/');
645 symbol__fprintf_symname_offs(alt.sym, &alt, stdout); 728 printed += symbol__fprintf_symname_offs(alt.sym, &alt, fp);
646 if (PRINT_FIELD(DSO)) { 729 if (PRINT_FIELD(DSO)) {
647 printf("("); 730 printed += fprintf(fp, "(");
648 map__fprintf_dsoname(alt.map, stdout); 731 printed += map__fprintf_dsoname(alt.map, fp);
649 printf(")"); 732 printed += fprintf(fp, ")");
650 } 733 }
651 printf("/%c/%c/%c/%d ", 734 printed += fprintf(fp, "/%c/%c/%c/%d ",
652 mispred_str( br->entries + i), 735 mispred_str( br->entries + i),
653 br->entries[i].flags.in_tx? 'X' : '-', 736 br->entries[i].flags.in_tx? 'X' : '-',
654 br->entries[i].flags.abort? 'A' : '-', 737 br->entries[i].flags.abort? 'A' : '-',
655 br->entries[i].flags.cycles); 738 br->entries[i].flags.cycles);
656 } 739 }
740
741 return printed;
657} 742}
658 743
659static void print_sample_brstackoff(struct perf_sample *sample, 744static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
660 struct thread *thread, 745 struct thread *thread,
661 struct perf_event_attr *attr) 746 struct perf_event_attr *attr, FILE *fp)
662{ 747{
663 struct branch_stack *br = sample->branch_stack; 748 struct branch_stack *br = sample->branch_stack;
664 struct addr_location alf, alt; 749 struct addr_location alf, alt;
665 u64 i, from, to; 750 u64 i, from, to;
751 int printed = 0;
666 752
667 if (!(br && br->nr)) 753 if (!(br && br->nr))
668 return; 754 return 0;
669 755
670 for (i = 0; i < br->nr; i++) { 756 for (i = 0; i < br->nr; i++) {
671 757
@@ -682,24 +768,26 @@ static void print_sample_brstackoff(struct perf_sample *sample,
682 if (alt.map && !alt.map->dso->adjust_symbols) 768 if (alt.map && !alt.map->dso->adjust_symbols)
683 to = map__map_ip(alt.map, to); 769 to = map__map_ip(alt.map, to);
684 770
685 printf(" 0x%"PRIx64, from); 771 printed += fprintf(fp, " 0x%"PRIx64, from);
686 if (PRINT_FIELD(DSO)) { 772 if (PRINT_FIELD(DSO)) {
687 printf("("); 773 printed += fprintf(fp, "(");
688 map__fprintf_dsoname(alf.map, stdout); 774 printed += map__fprintf_dsoname(alf.map, fp);
689 printf(")"); 775 printed += fprintf(fp, ")");
690 } 776 }
691 printf("/0x%"PRIx64, to); 777 printed += fprintf(fp, "/0x%"PRIx64, to);
692 if (PRINT_FIELD(DSO)) { 778 if (PRINT_FIELD(DSO)) {
693 printf("("); 779 printed += fprintf(fp, "(");
694 map__fprintf_dsoname(alt.map, stdout); 780 printed += map__fprintf_dsoname(alt.map, fp);
695 printf(")"); 781 printed += fprintf(fp, ")");
696 } 782 }
697 printf("/%c/%c/%c/%d ", 783 printed += fprintf(fp, "/%c/%c/%c/%d ",
698 mispred_str(br->entries + i), 784 mispred_str(br->entries + i),
699 br->entries[i].flags.in_tx ? 'X' : '-', 785 br->entries[i].flags.in_tx ? 'X' : '-',
700 br->entries[i].flags.abort ? 'A' : '-', 786 br->entries[i].flags.abort ? 'A' : '-',
701 br->entries[i].flags.cycles); 787 br->entries[i].flags.cycles);
702 } 788 }
789
790 return printed;
703} 791}
704#define MAXBB 16384UL 792#define MAXBB 16384UL
705 793
@@ -727,27 +815,26 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
727 * but the exit is not. Let the caller patch it up. 815 * but the exit is not. Let the caller patch it up.
728 */ 816 */
729 if (kernel != machine__kernel_ip(machine, end)) { 817 if (kernel != machine__kernel_ip(machine, end)) {
730 printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", 818 pr_debug("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", start, end);
731 start, end);
732 return -ENXIO; 819 return -ENXIO;
733 } 820 }
734 821
735 memset(&al, 0, sizeof(al)); 822 memset(&al, 0, sizeof(al));
736 if (end - start > MAXBB - MAXINSN) { 823 if (end - start > MAXBB - MAXINSN) {
737 if (last) 824 if (last)
738 printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); 825 pr_debug("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
739 else 826 else
740 printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); 827 pr_debug("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start);
741 return 0; 828 return 0;
742 } 829 }
743 830
744 thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); 831 thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
745 if (!al.map || !al.map->dso) { 832 if (!al.map || !al.map->dso) {
746 printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); 833 pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
747 return 0; 834 return 0;
748 } 835 }
749 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { 836 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
750 printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); 837 pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
751 return 0; 838 return 0;
752 } 839 }
753 840
@@ -760,36 +847,35 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
760 847
761 *is64bit = al.map->dso->is_64_bit; 848 *is64bit = al.map->dso->is_64_bit;
762 if (len <= 0) 849 if (len <= 0)
763 printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", 850 pr_debug("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
764 start, end); 851 start, end);
765 return len; 852 return len;
766} 853}
767 854
768static void print_jump(uint64_t ip, struct branch_entry *en, 855static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
769 struct perf_insn *x, u8 *inbuf, int len, 856 struct perf_insn *x, u8 *inbuf, int len,
770 int insn) 857 int insn, FILE *fp)
771{ 858{
772 printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", 859 int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
773 ip, 860 dump_insn(x, ip, inbuf, len, NULL),
774 dump_insn(x, ip, inbuf, len, NULL), 861 en->flags.predicted ? " PRED" : "",
775 en->flags.predicted ? " PRED" : "", 862 en->flags.mispred ? " MISPRED" : "",
776 en->flags.mispred ? " MISPRED" : "", 863 en->flags.in_tx ? " INTX" : "",
777 en->flags.in_tx ? " INTX" : "", 864 en->flags.abort ? " ABORT" : "");
778 en->flags.abort ? " ABORT" : "");
779 if (en->flags.cycles) { 865 if (en->flags.cycles) {
780 printf(" %d cycles", en->flags.cycles); 866 printed += fprintf(fp, " %d cycles", en->flags.cycles);
781 if (insn) 867 if (insn)
782 printf(" %.2f IPC", (float)insn / en->flags.cycles); 868 printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
783 } 869 }
784 putchar('\n'); 870 return printed + fprintf(fp, "\n");
785} 871}
786 872
787static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, 873static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
788 uint64_t addr, struct symbol **lastsym, 874 u8 cpumode, int cpu, struct symbol **lastsym,
789 struct perf_event_attr *attr) 875 struct perf_event_attr *attr, FILE *fp)
790{ 876{
791 struct addr_location al; 877 struct addr_location al;
792 int off; 878 int off, printed = 0;
793 879
794 memset(&al, 0, sizeof(al)); 880 memset(&al, 0, sizeof(al));
795 881
@@ -798,7 +884,7 @@ static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu,
798 thread__find_addr_map(thread, cpumode, MAP__VARIABLE, 884 thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
799 addr, &al); 885 addr, &al);
800 if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) 886 if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
801 return; 887 return 0;
802 888
803 al.cpu = cpu; 889 al.cpu = cpu;
804 al.sym = NULL; 890 al.sym = NULL;
@@ -806,37 +892,39 @@ static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu,
806 al.sym = map__find_symbol(al.map, al.addr); 892 al.sym = map__find_symbol(al.map, al.addr);
807 893
808 if (!al.sym) 894 if (!al.sym)
809 return; 895 return 0;
810 896
811 if (al.addr < al.sym->end) 897 if (al.addr < al.sym->end)
812 off = al.addr - al.sym->start; 898 off = al.addr - al.sym->start;
813 else 899 else
814 off = al.addr - al.map->start - al.sym->start; 900 off = al.addr - al.map->start - al.sym->start;
815 printf("\t%s", al.sym->name); 901 printed += fprintf(fp, "\t%s", al.sym->name);
816 if (off) 902 if (off)
817 printf("%+d", off); 903 printed += fprintf(fp, "%+d", off);
818 putchar(':'); 904 printed += fprintf(fp, ":");
819 if (PRINT_FIELD(SRCLINE)) 905 if (PRINT_FIELD(SRCLINE))
820 map__fprintf_srcline(al.map, al.addr, "\t", stdout); 906 printed += map__fprintf_srcline(al.map, al.addr, "\t", fp);
821 putchar('\n'); 907 printed += fprintf(fp, "\n");
822 *lastsym = al.sym; 908 *lastsym = al.sym;
909
910 return printed;
823} 911}
824 912
825static void print_sample_brstackinsn(struct perf_sample *sample, 913static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
826 struct thread *thread, 914 struct thread *thread,
827 struct perf_event_attr *attr, 915 struct perf_event_attr *attr,
828 struct machine *machine) 916 struct machine *machine, FILE *fp)
829{ 917{
830 struct branch_stack *br = sample->branch_stack; 918 struct branch_stack *br = sample->branch_stack;
831 u64 start, end; 919 u64 start, end;
832 int i, insn, len, nr, ilen; 920 int i, insn, len, nr, ilen, printed = 0;
833 struct perf_insn x; 921 struct perf_insn x;
834 u8 buffer[MAXBB]; 922 u8 buffer[MAXBB];
835 unsigned off; 923 unsigned off;
836 struct symbol *lastsym = NULL; 924 struct symbol *lastsym = NULL;
837 925
838 if (!(br && br->nr)) 926 if (!(br && br->nr))
839 return; 927 return 0;
840 nr = br->nr; 928 nr = br->nr;
841 if (max_blocks && nr > max_blocks + 1) 929 if (max_blocks && nr > max_blocks + 1)
842 nr = max_blocks + 1; 930 nr = max_blocks + 1;
@@ -844,17 +932,17 @@ static void print_sample_brstackinsn(struct perf_sample *sample,
844 x.thread = thread; 932 x.thread = thread;
845 x.cpu = sample->cpu; 933 x.cpu = sample->cpu;
846 934
847 putchar('\n'); 935 printed += fprintf(fp, "%c", '\n');
848 936
849 /* Handle first from jump, of which we don't know the entry. */ 937 /* Handle first from jump, of which we don't know the entry. */
850 len = grab_bb(buffer, br->entries[nr-1].from, 938 len = grab_bb(buffer, br->entries[nr-1].from,
851 br->entries[nr-1].from, 939 br->entries[nr-1].from,
852 machine, thread, &x.is64bit, &x.cpumode, false); 940 machine, thread, &x.is64bit, &x.cpumode, false);
853 if (len > 0) { 941 if (len > 0) {
854 print_ip_sym(thread, x.cpumode, x.cpu, 942 printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
855 br->entries[nr - 1].from, &lastsym, attr); 943 x.cpumode, x.cpu, &lastsym, attr, fp);
856 print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], 944 printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
857 &x, buffer, len, 0); 945 &x, buffer, len, 0, fp);
858 } 946 }
859 947
860 /* Print all blocks */ 948 /* Print all blocks */
@@ -880,13 +968,13 @@ static void print_sample_brstackinsn(struct perf_sample *sample,
880 for (off = 0;; off += ilen) { 968 for (off = 0;; off += ilen) {
881 uint64_t ip = start + off; 969 uint64_t ip = start + off;
882 970
883 print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); 971 printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
884 if (ip == end) { 972 if (ip == end) {
885 print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); 973 printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
886 break; 974 break;
887 } else { 975 } else {
888 printf("\t%016" PRIx64 "\t%s\n", ip, 976 printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
889 dump_insn(&x, ip, buffer + off, len - off, &ilen)); 977 dump_insn(&x, ip, buffer + off, len - off, &ilen));
890 if (ilen == 0) 978 if (ilen == 0)
891 break; 979 break;
892 insn++; 980 insn++;
@@ -899,9 +987,9 @@ static void print_sample_brstackinsn(struct perf_sample *sample,
899 * has not been executed yet. 987 * has not been executed yet.
900 */ 988 */
901 if (br->entries[0].from == sample->ip) 989 if (br->entries[0].from == sample->ip)
902 return; 990 goto out;
903 if (br->entries[0].flags.abort) 991 if (br->entries[0].flags.abort)
904 return; 992 goto out;
905 993
906 /* 994 /*
907 * Print final block upto sample 995 * Print final block upto sample
@@ -909,58 +997,61 @@ static void print_sample_brstackinsn(struct perf_sample *sample,
909 start = br->entries[0].to; 997 start = br->entries[0].to;
910 end = sample->ip; 998 end = sample->ip;
911 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); 999 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
912 print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); 1000 printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
913 if (len <= 0) { 1001 if (len <= 0) {
914 /* Print at least last IP if basic block did not work */ 1002 /* Print at least last IP if basic block did not work */
915 len = grab_bb(buffer, sample->ip, sample->ip, 1003 len = grab_bb(buffer, sample->ip, sample->ip,
916 machine, thread, &x.is64bit, &x.cpumode, false); 1004 machine, thread, &x.is64bit, &x.cpumode, false);
917 if (len <= 0) 1005 if (len <= 0)
918 return; 1006 goto out;
919 1007
920 printf("\t%016" PRIx64 "\t%s\n", sample->ip, 1008 printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
921 dump_insn(&x, sample->ip, buffer, len, NULL)); 1009 dump_insn(&x, sample->ip, buffer, len, NULL));
922 return; 1010 goto out;
923 } 1011 }
924 for (off = 0; off <= end - start; off += ilen) { 1012 for (off = 0; off <= end - start; off += ilen) {
925 printf("\t%016" PRIx64 "\t%s\n", start + off, 1013 printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off,
926 dump_insn(&x, start + off, buffer + off, len - off, &ilen)); 1014 dump_insn(&x, start + off, buffer + off, len - off, &ilen));
927 if (ilen == 0) 1015 if (ilen == 0)
928 break; 1016 break;
929 } 1017 }
1018out:
1019 return printed;
930} 1020}
931 1021
932static void print_sample_addr(struct perf_sample *sample, 1022static int perf_sample__fprintf_addr(struct perf_sample *sample,
933 struct thread *thread, 1023 struct thread *thread,
934 struct perf_event_attr *attr) 1024 struct perf_event_attr *attr, FILE *fp)
935{ 1025{
936 struct addr_location al; 1026 struct addr_location al;
937 1027 int printed = fprintf(fp, "%16" PRIx64, sample->addr);
938 printf("%16" PRIx64, sample->addr);
939 1028
940 if (!sample_addr_correlates_sym(attr)) 1029 if (!sample_addr_correlates_sym(attr))
941 return; 1030 goto out;
942 1031
943 thread__resolve(thread, &al, sample); 1032 thread__resolve(thread, &al, sample);
944 1033
945 if (PRINT_FIELD(SYM)) { 1034 if (PRINT_FIELD(SYM)) {
946 printf(" "); 1035 printed += fprintf(fp, " ");
947 if (PRINT_FIELD(SYMOFFSET)) 1036 if (PRINT_FIELD(SYMOFFSET))
948 symbol__fprintf_symname_offs(al.sym, &al, stdout); 1037 printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
949 else 1038 else
950 symbol__fprintf_symname(al.sym, stdout); 1039 printed += symbol__fprintf_symname(al.sym, fp);
951 } 1040 }
952 1041
953 if (PRINT_FIELD(DSO)) { 1042 if (PRINT_FIELD(DSO)) {
954 printf(" ("); 1043 printed += fprintf(fp, " (");
955 map__fprintf_dsoname(al.map, stdout); 1044 printed += map__fprintf_dsoname(al.map, fp);
956 printf(")"); 1045 printed += fprintf(fp, ")");
957 } 1046 }
1047out:
1048 return printed;
958} 1049}
959 1050
960static void print_sample_callindent(struct perf_sample *sample, 1051static int perf_sample__fprintf_callindent(struct perf_sample *sample,
961 struct perf_evsel *evsel, 1052 struct perf_evsel *evsel,
962 struct thread *thread, 1053 struct thread *thread,
963 struct addr_location *al) 1054 struct addr_location *al, FILE *fp)
964{ 1055{
965 struct perf_event_attr *attr = &evsel->attr; 1056 struct perf_event_attr *attr = &evsel->attr;
966 size_t depth = thread_stack__depth(thread); 1057 size_t depth = thread_stack__depth(thread);
@@ -995,12 +1086,12 @@ static void print_sample_callindent(struct perf_sample *sample,
995 } 1086 }
996 1087
997 if (name) 1088 if (name)
998 len = printf("%*s%s", (int)depth * 4, "", name); 1089 len = fprintf(fp, "%*s%s", (int)depth * 4, "", name);
999 else if (ip) 1090 else if (ip)
1000 len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip); 1091 len = fprintf(fp, "%*s%16" PRIx64, (int)depth * 4, "", ip);
1001 1092
1002 if (len < 0) 1093 if (len < 0)
1003 return; 1094 return len;
1004 1095
1005 /* 1096 /*
1006 * Try to keep the output length from changing frequently so that the 1097 * Try to keep the output length from changing frequently so that the
@@ -1010,39 +1101,46 @@ static void print_sample_callindent(struct perf_sample *sample,
1010 spacing = round_up(len + 4, 32); 1101 spacing = round_up(len + 4, 32);
1011 1102
1012 if (len < spacing) 1103 if (len < spacing)
1013 printf("%*s", spacing - len, ""); 1104 len += fprintf(fp, "%*s", spacing - len, "");
1105
1106 return len;
1014} 1107}
1015 1108
1016static void print_insn(struct perf_sample *sample, 1109static int perf_sample__fprintf_insn(struct perf_sample *sample,
1017 struct perf_event_attr *attr, 1110 struct perf_event_attr *attr,
1018 struct thread *thread, 1111 struct thread *thread,
1019 struct machine *machine) 1112 struct machine *machine, FILE *fp)
1020{ 1113{
1114 int printed = 0;
1115
1021 if (PRINT_FIELD(INSNLEN)) 1116 if (PRINT_FIELD(INSNLEN))
1022 printf(" ilen: %d", sample->insn_len); 1117 printed += fprintf(fp, " ilen: %d", sample->insn_len);
1023 if (PRINT_FIELD(INSN)) { 1118 if (PRINT_FIELD(INSN)) {
1024 int i; 1119 int i;
1025 1120
1026 printf(" insn:"); 1121 printed += fprintf(fp, " insn:");
1027 for (i = 0; i < sample->insn_len; i++) 1122 for (i = 0; i < sample->insn_len; i++)
1028 printf(" %02x", (unsigned char)sample->insn[i]); 1123 printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]);
1029 } 1124 }
1030 if (PRINT_FIELD(BRSTACKINSN)) 1125 if (PRINT_FIELD(BRSTACKINSN))
1031 print_sample_brstackinsn(sample, thread, attr, machine); 1126 printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
1127
1128 return printed;
1032} 1129}
1033 1130
1034static void print_sample_bts(struct perf_sample *sample, 1131static int perf_sample__fprintf_bts(struct perf_sample *sample,
1035 struct perf_evsel *evsel, 1132 struct perf_evsel *evsel,
1036 struct thread *thread, 1133 struct thread *thread,
1037 struct addr_location *al, 1134 struct addr_location *al,
1038 struct machine *machine) 1135 struct machine *machine, FILE *fp)
1039{ 1136{
1040 struct perf_event_attr *attr = &evsel->attr; 1137 struct perf_event_attr *attr = &evsel->attr;
1041 unsigned int type = output_type(attr->type); 1138 unsigned int type = output_type(attr->type);
1042 bool print_srcline_last = false; 1139 bool print_srcline_last = false;
1140 int printed = 0;
1043 1141
1044 if (PRINT_FIELD(CALLINDENT)) 1142 if (PRINT_FIELD(CALLINDENT))
1045 print_sample_callindent(sample, evsel, thread, al); 1143 printed += perf_sample__fprintf_callindent(sample, evsel, thread, al, fp);
1046 1144
1047 /* print branch_from information */ 1145 /* print branch_from information */
1048 if (PRINT_FIELD(IP)) { 1146 if (PRINT_FIELD(IP)) {
@@ -1055,31 +1153,30 @@ static void print_sample_bts(struct perf_sample *sample,
1055 cursor = &callchain_cursor; 1153 cursor = &callchain_cursor;
1056 1154
1057 if (cursor == NULL) { 1155 if (cursor == NULL) {
1058 putchar(' '); 1156 printed += fprintf(fp, " ");
1059 if (print_opts & EVSEL__PRINT_SRCLINE) { 1157 if (print_opts & EVSEL__PRINT_SRCLINE) {
1060 print_srcline_last = true; 1158 print_srcline_last = true;
1061 print_opts &= ~EVSEL__PRINT_SRCLINE; 1159 print_opts &= ~EVSEL__PRINT_SRCLINE;
1062 } 1160 }
1063 } else 1161 } else
1064 putchar('\n'); 1162 printed += fprintf(fp, "\n");
1065 1163
1066 sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout); 1164 printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp);
1067 } 1165 }
1068 1166
1069 /* print branch_to information */ 1167 /* print branch_to information */
1070 if (PRINT_FIELD(ADDR) || 1168 if (PRINT_FIELD(ADDR) ||
1071 ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && 1169 ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
1072 !output[type].user_set)) { 1170 !output[type].user_set)) {
1073 printf(" => "); 1171 printed += fprintf(fp, " => ");
1074 print_sample_addr(sample, thread, attr); 1172 printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
1075 } 1173 }
1076 1174
1077 if (print_srcline_last) 1175 if (print_srcline_last)
1078 map__fprintf_srcline(al->map, al->addr, "\n ", stdout); 1176 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
1079
1080 print_insn(sample, attr, thread, machine);
1081 1177
1082 printf("\n"); 1178 printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
1179 return printed + fprintf(fp, "\n");
1083} 1180}
1084 1181
1085static struct { 1182static struct {
@@ -1102,7 +1199,7 @@ static struct {
1102 {0, NULL} 1199 {0, NULL}
1103}; 1200};
1104 1201
1105static void print_sample_flags(u32 flags) 1202static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
1106{ 1203{
1107 const char *chars = PERF_IP_FLAG_CHARS; 1204 const char *chars = PERF_IP_FLAG_CHARS;
1108 const int n = strlen(PERF_IP_FLAG_CHARS); 1205 const int n = strlen(PERF_IP_FLAG_CHARS);
@@ -1129,9 +1226,9 @@ static void print_sample_flags(u32 flags)
1129 str[pos] = 0; 1226 str[pos] = 0;
1130 1227
1131 if (name) 1228 if (name)
1132 printf(" %-7s%4s ", name, in_tx ? "(x)" : ""); 1229 return fprintf(fp, " %-7s%4s ", name, in_tx ? "(x)" : "");
1133 else 1230
1134 printf(" %-11s ", str); 1231 return fprintf(fp, " %-11s ", str);
1135} 1232}
1136 1233
1137struct printer_data { 1234struct printer_data {
@@ -1140,40 +1237,40 @@ struct printer_data {
1140 bool is_printable; 1237 bool is_printable;
1141}; 1238};
1142 1239
1143static void 1240static int sample__fprintf_bpf_output(enum binary_printer_ops op,
1144print_sample_bpf_output_printer(enum binary_printer_ops op, 1241 unsigned int val,
1145 unsigned int val, 1242 void *extra, FILE *fp)
1146 void *extra)
1147{ 1243{
1148 unsigned char ch = (unsigned char)val; 1244 unsigned char ch = (unsigned char)val;
1149 struct printer_data *printer_data = extra; 1245 struct printer_data *printer_data = extra;
1246 int printed = 0;
1150 1247
1151 switch (op) { 1248 switch (op) {
1152 case BINARY_PRINT_DATA_BEGIN: 1249 case BINARY_PRINT_DATA_BEGIN:
1153 printf("\n"); 1250 printed += fprintf(fp, "\n");
1154 break; 1251 break;
1155 case BINARY_PRINT_LINE_BEGIN: 1252 case BINARY_PRINT_LINE_BEGIN:
1156 printf("%17s", !printer_data->line_no ? "BPF output:" : 1253 printed += fprintf(fp, "%17s", !printer_data->line_no ? "BPF output:" :
1157 " "); 1254 " ");
1158 break; 1255 break;
1159 case BINARY_PRINT_ADDR: 1256 case BINARY_PRINT_ADDR:
1160 printf(" %04x:", val); 1257 printed += fprintf(fp, " %04x:", val);
1161 break; 1258 break;
1162 case BINARY_PRINT_NUM_DATA: 1259 case BINARY_PRINT_NUM_DATA:
1163 printf(" %02x", val); 1260 printed += fprintf(fp, " %02x", val);
1164 break; 1261 break;
1165 case BINARY_PRINT_NUM_PAD: 1262 case BINARY_PRINT_NUM_PAD:
1166 printf(" "); 1263 printed += fprintf(fp, " ");
1167 break; 1264 break;
1168 case BINARY_PRINT_SEP: 1265 case BINARY_PRINT_SEP:
1169 printf(" "); 1266 printed += fprintf(fp, " ");
1170 break; 1267 break;
1171 case BINARY_PRINT_CHAR_DATA: 1268 case BINARY_PRINT_CHAR_DATA:
1172 if (printer_data->hit_nul && ch) 1269 if (printer_data->hit_nul && ch)
1173 printer_data->is_printable = false; 1270 printer_data->is_printable = false;
1174 1271
1175 if (!isprint(ch)) { 1272 if (!isprint(ch)) {
1176 printf("%c", '.'); 1273 printed += fprintf(fp, "%c", '.');
1177 1274
1178 if (!printer_data->is_printable) 1275 if (!printer_data->is_printable)
1179 break; 1276 break;
@@ -1183,154 +1280,154 @@ print_sample_bpf_output_printer(enum binary_printer_ops op,
1183 else 1280 else
1184 printer_data->is_printable = false; 1281 printer_data->is_printable = false;
1185 } else { 1282 } else {
1186 printf("%c", ch); 1283 printed += fprintf(fp, "%c", ch);
1187 } 1284 }
1188 break; 1285 break;
1189 case BINARY_PRINT_CHAR_PAD: 1286 case BINARY_PRINT_CHAR_PAD:
1190 printf(" "); 1287 printed += fprintf(fp, " ");
1191 break; 1288 break;
1192 case BINARY_PRINT_LINE_END: 1289 case BINARY_PRINT_LINE_END:
1193 printf("\n"); 1290 printed += fprintf(fp, "\n");
1194 printer_data->line_no++; 1291 printer_data->line_no++;
1195 break; 1292 break;
1196 case BINARY_PRINT_DATA_END: 1293 case BINARY_PRINT_DATA_END:
1197 default: 1294 default:
1198 break; 1295 break;
1199 } 1296 }
1297
1298 return printed;
1200} 1299}
1201 1300
1202static void print_sample_bpf_output(struct perf_sample *sample) 1301static int perf_sample__fprintf_bpf_output(struct perf_sample *sample, FILE *fp)
1203{ 1302{
1204 unsigned int nr_bytes = sample->raw_size; 1303 unsigned int nr_bytes = sample->raw_size;
1205 struct printer_data printer_data = {0, false, true}; 1304 struct printer_data printer_data = {0, false, true};
1206 1305 int printed = binary__fprintf(sample->raw_data, nr_bytes, 8,
1207 print_binary(sample->raw_data, nr_bytes, 8, 1306 sample__fprintf_bpf_output, &printer_data, fp);
1208 print_sample_bpf_output_printer, &printer_data);
1209 1307
1210 if (printer_data.is_printable && printer_data.hit_nul) 1308 if (printer_data.is_printable && printer_data.hit_nul)
1211 printf("%17s \"%s\"\n", "BPF string:", 1309 printed += fprintf(fp, "%17s \"%s\"\n", "BPF string:", (char *)(sample->raw_data));
1212 (char *)(sample->raw_data)); 1310
1311 return printed;
1213} 1312}
1214 1313
1215static void print_sample_spacing(int len, int spacing) 1314static int perf_sample__fprintf_spacing(int len, int spacing, FILE *fp)
1216{ 1315{
1217 if (len > 0 && len < spacing) 1316 if (len > 0 && len < spacing)
1218 printf("%*s", spacing - len, ""); 1317 return fprintf(fp, "%*s", spacing - len, "");
1318
1319 return 0;
1219} 1320}
1220 1321
1221static void print_sample_pt_spacing(int len) 1322static int perf_sample__fprintf_pt_spacing(int len, FILE *fp)
1222{ 1323{
1223 print_sample_spacing(len, 34); 1324 return perf_sample__fprintf_spacing(len, 34, fp);
1224} 1325}
1225 1326
1226static void print_sample_synth_ptwrite(struct perf_sample *sample) 1327static int perf_sample__fprintf_synth_ptwrite(struct perf_sample *sample, FILE *fp)
1227{ 1328{
1228 struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample); 1329 struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
1229 int len; 1330 int len;
1230 1331
1231 if (perf_sample__bad_synth_size(sample, *data)) 1332 if (perf_sample__bad_synth_size(sample, *data))
1232 return; 1333 return 0;
1233 1334
1234 len = printf(" IP: %u payload: %#" PRIx64 " ", 1335 len = fprintf(fp, " IP: %u payload: %#" PRIx64 " ",
1235 data->ip, le64_to_cpu(data->payload)); 1336 data->ip, le64_to_cpu(data->payload));
1236 print_sample_pt_spacing(len); 1337 return len + perf_sample__fprintf_pt_spacing(len, fp);
1237} 1338}
1238 1339
1239static void print_sample_synth_mwait(struct perf_sample *sample) 1340static int perf_sample__fprintf_synth_mwait(struct perf_sample *sample, FILE *fp)
1240{ 1341{
1241 struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample); 1342 struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
1242 int len; 1343 int len;
1243 1344
1244 if (perf_sample__bad_synth_size(sample, *data)) 1345 if (perf_sample__bad_synth_size(sample, *data))
1245 return; 1346 return 0;
1246 1347
1247 len = printf(" hints: %#x extensions: %#x ", 1348 len = fprintf(fp, " hints: %#x extensions: %#x ",
1248 data->hints, data->extensions); 1349 data->hints, data->extensions);
1249 print_sample_pt_spacing(len); 1350 return len + perf_sample__fprintf_pt_spacing(len, fp);
1250} 1351}
1251 1352
1252static void print_sample_synth_pwre(struct perf_sample *sample) 1353static int perf_sample__fprintf_synth_pwre(struct perf_sample *sample, FILE *fp)
1253{ 1354{
1254 struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample); 1355 struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
1255 int len; 1356 int len;
1256 1357
1257 if (perf_sample__bad_synth_size(sample, *data)) 1358 if (perf_sample__bad_synth_size(sample, *data))
1258 return; 1359 return 0;
1259 1360
1260 len = printf(" hw: %u cstate: %u sub-cstate: %u ", 1361 len = fprintf(fp, " hw: %u cstate: %u sub-cstate: %u ",
1261 data->hw, data->cstate, data->subcstate); 1362 data->hw, data->cstate, data->subcstate);
1262 print_sample_pt_spacing(len); 1363 return len + perf_sample__fprintf_pt_spacing(len, fp);
1263} 1364}
1264 1365
1265static void print_sample_synth_exstop(struct perf_sample *sample) 1366static int perf_sample__fprintf_synth_exstop(struct perf_sample *sample, FILE *fp)
1266{ 1367{
1267 struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample); 1368 struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
1268 int len; 1369 int len;
1269 1370
1270 if (perf_sample__bad_synth_size(sample, *data)) 1371 if (perf_sample__bad_synth_size(sample, *data))
1271 return; 1372 return 0;
1272 1373
1273 len = printf(" IP: %u ", data->ip); 1374 len = fprintf(fp, " IP: %u ", data->ip);
1274 print_sample_pt_spacing(len); 1375 return len + perf_sample__fprintf_pt_spacing(len, fp);
1275} 1376}
1276 1377
1277static void print_sample_synth_pwrx(struct perf_sample *sample) 1378static int perf_sample__fprintf_synth_pwrx(struct perf_sample *sample, FILE *fp)
1278{ 1379{
1279 struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample); 1380 struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
1280 int len; 1381 int len;
1281 1382
1282 if (perf_sample__bad_synth_size(sample, *data)) 1383 if (perf_sample__bad_synth_size(sample, *data))
1283 return; 1384 return 0;
1284 1385
1285 len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ", 1386 len = fprintf(fp, " deepest cstate: %u last cstate: %u wake reason: %#x ",
1286 data->deepest_cstate, data->last_cstate, 1387 data->deepest_cstate, data->last_cstate,
1287 data->wake_reason); 1388 data->wake_reason);
1288 print_sample_pt_spacing(len); 1389 return len + perf_sample__fprintf_pt_spacing(len, fp);
1289} 1390}
1290 1391
1291static void print_sample_synth_cbr(struct perf_sample *sample) 1392static int perf_sample__fprintf_synth_cbr(struct perf_sample *sample, FILE *fp)
1292{ 1393{
1293 struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample); 1394 struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
1294 unsigned int percent, freq; 1395 unsigned int percent, freq;
1295 int len; 1396 int len;
1296 1397
1297 if (perf_sample__bad_synth_size(sample, *data)) 1398 if (perf_sample__bad_synth_size(sample, *data))
1298 return; 1399 return 0;
1299 1400
1300 freq = (le32_to_cpu(data->freq) + 500) / 1000; 1401 freq = (le32_to_cpu(data->freq) + 500) / 1000;
1301 len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq); 1402 len = fprintf(fp, " cbr: %2u freq: %4u MHz ", data->cbr, freq);
1302 if (data->max_nonturbo) { 1403 if (data->max_nonturbo) {
1303 percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10; 1404 percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
1304 len += printf("(%3u%%) ", percent); 1405 len += fprintf(fp, "(%3u%%) ", percent);
1305 } 1406 }
1306 print_sample_pt_spacing(len); 1407 return len + perf_sample__fprintf_pt_spacing(len, fp);
1307} 1408}
1308 1409
1309static void print_sample_synth(struct perf_sample *sample, 1410static int perf_sample__fprintf_synth(struct perf_sample *sample,
1310 struct perf_evsel *evsel) 1411 struct perf_evsel *evsel, FILE *fp)
1311{ 1412{
1312 switch (evsel->attr.config) { 1413 switch (evsel->attr.config) {
1313 case PERF_SYNTH_INTEL_PTWRITE: 1414 case PERF_SYNTH_INTEL_PTWRITE:
1314 print_sample_synth_ptwrite(sample); 1415 return perf_sample__fprintf_synth_ptwrite(sample, fp);
1315 break;
1316 case PERF_SYNTH_INTEL_MWAIT: 1416 case PERF_SYNTH_INTEL_MWAIT:
1317 print_sample_synth_mwait(sample); 1417 return perf_sample__fprintf_synth_mwait(sample, fp);
1318 break;
1319 case PERF_SYNTH_INTEL_PWRE: 1418 case PERF_SYNTH_INTEL_PWRE:
1320 print_sample_synth_pwre(sample); 1419 return perf_sample__fprintf_synth_pwre(sample, fp);
1321 break;
1322 case PERF_SYNTH_INTEL_EXSTOP: 1420 case PERF_SYNTH_INTEL_EXSTOP:
1323 print_sample_synth_exstop(sample); 1421 return perf_sample__fprintf_synth_exstop(sample, fp);
1324 break;
1325 case PERF_SYNTH_INTEL_PWRX: 1422 case PERF_SYNTH_INTEL_PWRX:
1326 print_sample_synth_pwrx(sample); 1423 return perf_sample__fprintf_synth_pwrx(sample, fp);
1327 break;
1328 case PERF_SYNTH_INTEL_CBR: 1424 case PERF_SYNTH_INTEL_CBR:
1329 print_sample_synth_cbr(sample); 1425 return perf_sample__fprintf_synth_cbr(sample, fp);
1330 break;
1331 default: 1426 default:
1332 break; 1427 break;
1333 } 1428 }
1429
1430 return 0;
1334} 1431}
1335 1432
1336struct perf_script { 1433struct perf_script {
@@ -1341,6 +1438,7 @@ struct perf_script {
1341 bool show_switch_events; 1438 bool show_switch_events;
1342 bool show_namespace_events; 1439 bool show_namespace_events;
1343 bool allocated; 1440 bool allocated;
1441 bool per_event_dump;
1344 struct cpu_map *cpus; 1442 struct cpu_map *cpus;
1345 struct thread_map *threads; 1443 struct thread_map *threads;
1346 int name_width; 1444 int name_width;
@@ -1362,7 +1460,7 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
1362 return max; 1460 return max;
1363} 1461}
1364 1462
1365static size_t data_src__printf(u64 data_src) 1463static int data_src__fprintf(u64 data_src, FILE *fp)
1366{ 1464{
1367 struct mem_info mi = { .data_src.val = data_src }; 1465 struct mem_info mi = { .data_src.val = data_src };
1368 char decode[100]; 1466 char decode[100];
@@ -1376,7 +1474,7 @@ static size_t data_src__printf(u64 data_src)
1376 if (maxlen < len) 1474 if (maxlen < len)
1377 maxlen = len; 1475 maxlen = len;
1378 1476
1379 return printf("%-*s", maxlen, out); 1477 return fprintf(fp, "%-*s", maxlen, out);
1380} 1478}
1381 1479
1382static void process_event(struct perf_script *script, 1480static void process_event(struct perf_script *script,
@@ -1387,14 +1485,18 @@ static void process_event(struct perf_script *script,
1387 struct thread *thread = al->thread; 1485 struct thread *thread = al->thread;
1388 struct perf_event_attr *attr = &evsel->attr; 1486 struct perf_event_attr *attr = &evsel->attr;
1389 unsigned int type = output_type(attr->type); 1487 unsigned int type = output_type(attr->type);
1488 struct perf_evsel_script *es = evsel->priv;
1489 FILE *fp = es->fp;
1390 1490
1391 if (output[type].fields == 0) 1491 if (output[type].fields == 0)
1392 return; 1492 return;
1393 1493
1394 print_sample_start(sample, thread, evsel); 1494 ++es->samples;
1495
1496 perf_sample__fprintf_start(sample, thread, evsel, fp);
1395 1497
1396 if (PRINT_FIELD(PERIOD)) 1498 if (PRINT_FIELD(PERIOD))
1397 printf("%10" PRIu64 " ", sample->period); 1499 fprintf(fp, "%10" PRIu64 " ", sample->period);
1398 1500
1399 if (PRINT_FIELD(EVNAME)) { 1501 if (PRINT_FIELD(EVNAME)) {
1400 const char *evname = perf_evsel__name(evsel); 1502 const char *evname = perf_evsel__name(evsel);
@@ -1402,33 +1504,33 @@ static void process_event(struct perf_script *script,
1402 if (!script->name_width) 1504 if (!script->name_width)
1403 script->name_width = perf_evlist__max_name_len(script->session->evlist); 1505 script->name_width = perf_evlist__max_name_len(script->session->evlist);
1404 1506
1405 printf("%*s: ", script->name_width, 1507 fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
1406 evname ? evname : "[unknown]");
1407 } 1508 }
1408 1509
1409 if (print_flags) 1510 if (print_flags)
1410 print_sample_flags(sample->flags); 1511 perf_sample__fprintf_flags(sample->flags, fp);
1411 1512
1412 if (is_bts_event(attr)) { 1513 if (is_bts_event(attr)) {
1413 print_sample_bts(sample, evsel, thread, al, machine); 1514 perf_sample__fprintf_bts(sample, evsel, thread, al, machine, fp);
1414 return; 1515 return;
1415 } 1516 }
1416 1517
1417 if (PRINT_FIELD(TRACE)) 1518 if (PRINT_FIELD(TRACE)) {
1418 event_format__print(evsel->tp_format, sample->cpu, 1519 event_format__fprintf(evsel->tp_format, sample->cpu,
1419 sample->raw_data, sample->raw_size); 1520 sample->raw_data, sample->raw_size, fp);
1521 }
1420 1522
1421 if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH)) 1523 if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
1422 print_sample_synth(sample, evsel); 1524 perf_sample__fprintf_synth(sample, evsel, fp);
1423 1525
1424 if (PRINT_FIELD(ADDR)) 1526 if (PRINT_FIELD(ADDR))
1425 print_sample_addr(sample, thread, attr); 1527 perf_sample__fprintf_addr(sample, thread, attr, fp);
1426 1528
1427 if (PRINT_FIELD(DATA_SRC)) 1529 if (PRINT_FIELD(DATA_SRC))
1428 data_src__printf(sample->data_src); 1530 data_src__fprintf(sample->data_src, fp);
1429 1531
1430 if (PRINT_FIELD(WEIGHT)) 1532 if (PRINT_FIELD(WEIGHT))
1431 printf("%16" PRIu64, sample->weight); 1533 fprintf(fp, "%16" PRIu64, sample->weight);
1432 1534
1433 if (PRINT_FIELD(IP)) { 1535 if (PRINT_FIELD(IP)) {
1434 struct callchain_cursor *cursor = NULL; 1536 struct callchain_cursor *cursor = NULL;
@@ -1438,27 +1540,30 @@ static void process_event(struct perf_script *script,
1438 sample, NULL, NULL, scripting_max_stack) == 0) 1540 sample, NULL, NULL, scripting_max_stack) == 0)
1439 cursor = &callchain_cursor; 1541 cursor = &callchain_cursor;
1440 1542
1441 putchar(cursor ? '\n' : ' '); 1543 fputc(cursor ? '\n' : ' ', fp);
1442 sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, stdout); 1544 sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp);
1443 } 1545 }
1444 1546
1445 if (PRINT_FIELD(IREGS)) 1547 if (PRINT_FIELD(IREGS))
1446 print_sample_iregs(sample, attr); 1548 perf_sample__fprintf_iregs(sample, attr, fp);
1549
1550 if (PRINT_FIELD(UREGS))
1551 perf_sample__fprintf_uregs(sample, attr, fp);
1447 1552
1448 if (PRINT_FIELD(BRSTACK)) 1553 if (PRINT_FIELD(BRSTACK))
1449 print_sample_brstack(sample, thread, attr); 1554 perf_sample__fprintf_brstack(sample, thread, attr, fp);
1450 else if (PRINT_FIELD(BRSTACKSYM)) 1555 else if (PRINT_FIELD(BRSTACKSYM))
1451 print_sample_brstacksym(sample, thread, attr); 1556 perf_sample__fprintf_brstacksym(sample, thread, attr, fp);
1452 else if (PRINT_FIELD(BRSTACKOFF)) 1557 else if (PRINT_FIELD(BRSTACKOFF))
1453 print_sample_brstackoff(sample, thread, attr); 1558 perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
1454 1559
1455 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) 1560 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
1456 print_sample_bpf_output(sample); 1561 perf_sample__fprintf_bpf_output(sample, fp);
1457 print_insn(sample, attr, thread, machine); 1562 perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
1458 1563
1459 if (PRINT_FIELD(PHYS_ADDR)) 1564 if (PRINT_FIELD(PHYS_ADDR))
1460 printf("%16" PRIx64, sample->phys_addr); 1565 fprintf(fp, "%16" PRIx64, sample->phys_addr);
1461 printf("\n"); 1566 fprintf(fp, "\n");
1462} 1567}
1463 1568
1464static struct scripting_ops *scripting_ops; 1569static struct scripting_ops *scripting_ops;
@@ -1632,7 +1737,7 @@ static int process_comm_event(struct perf_tool *tool,
1632 sample->tid = event->comm.tid; 1737 sample->tid = event->comm.tid;
1633 sample->pid = event->comm.pid; 1738 sample->pid = event->comm.pid;
1634 } 1739 }
1635 print_sample_start(sample, thread, evsel); 1740 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1636 perf_event__fprintf(event, stdout); 1741 perf_event__fprintf(event, stdout);
1637 ret = 0; 1742 ret = 0;
1638out: 1743out:
@@ -1667,7 +1772,7 @@ static int process_namespaces_event(struct perf_tool *tool,
1667 sample->tid = event->namespaces.tid; 1772 sample->tid = event->namespaces.tid;
1668 sample->pid = event->namespaces.pid; 1773 sample->pid = event->namespaces.pid;
1669 } 1774 }
1670 print_sample_start(sample, thread, evsel); 1775 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1671 perf_event__fprintf(event, stdout); 1776 perf_event__fprintf(event, stdout);
1672 ret = 0; 1777 ret = 0;
1673out: 1778out:
@@ -1700,7 +1805,7 @@ static int process_fork_event(struct perf_tool *tool,
1700 sample->tid = event->fork.tid; 1805 sample->tid = event->fork.tid;
1701 sample->pid = event->fork.pid; 1806 sample->pid = event->fork.pid;
1702 } 1807 }
1703 print_sample_start(sample, thread, evsel); 1808 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1704 perf_event__fprintf(event, stdout); 1809 perf_event__fprintf(event, stdout);
1705 thread__put(thread); 1810 thread__put(thread);
1706 1811
@@ -1729,7 +1834,7 @@ static int process_exit_event(struct perf_tool *tool,
1729 sample->tid = event->fork.tid; 1834 sample->tid = event->fork.tid;
1730 sample->pid = event->fork.pid; 1835 sample->pid = event->fork.pid;
1731 } 1836 }
1732 print_sample_start(sample, thread, evsel); 1837 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1733 perf_event__fprintf(event, stdout); 1838 perf_event__fprintf(event, stdout);
1734 1839
1735 if (perf_event__process_exit(tool, event, sample, machine) < 0) 1840 if (perf_event__process_exit(tool, event, sample, machine) < 0)
@@ -1764,7 +1869,7 @@ static int process_mmap_event(struct perf_tool *tool,
1764 sample->tid = event->mmap.tid; 1869 sample->tid = event->mmap.tid;
1765 sample->pid = event->mmap.pid; 1870 sample->pid = event->mmap.pid;
1766 } 1871 }
1767 print_sample_start(sample, thread, evsel); 1872 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1768 perf_event__fprintf(event, stdout); 1873 perf_event__fprintf(event, stdout);
1769 thread__put(thread); 1874 thread__put(thread);
1770 return 0; 1875 return 0;
@@ -1795,7 +1900,7 @@ static int process_mmap2_event(struct perf_tool *tool,
1795 sample->tid = event->mmap2.tid; 1900 sample->tid = event->mmap2.tid;
1796 sample->pid = event->mmap2.pid; 1901 sample->pid = event->mmap2.pid;
1797 } 1902 }
1798 print_sample_start(sample, thread, evsel); 1903 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1799 perf_event__fprintf(event, stdout); 1904 perf_event__fprintf(event, stdout);
1800 thread__put(thread); 1905 thread__put(thread);
1801 return 0; 1906 return 0;
@@ -1821,7 +1926,7 @@ static int process_switch_event(struct perf_tool *tool,
1821 return -1; 1926 return -1;
1822 } 1927 }
1823 1928
1824 print_sample_start(sample, thread, evsel); 1929 perf_sample__fprintf_start(sample, thread, evsel, stdout);
1825 perf_event__fprintf(event, stdout); 1930 perf_event__fprintf(event, stdout);
1826 thread__put(thread); 1931 thread__put(thread);
1827 return 0; 1932 return 0;
@@ -1832,6 +1937,65 @@ static void sig_handler(int sig __maybe_unused)
1832 session_done = 1; 1937 session_done = 1;
1833} 1938}
1834 1939
1940static void perf_script__fclose_per_event_dump(struct perf_script *script)
1941{
1942 struct perf_evlist *evlist = script->session->evlist;
1943 struct perf_evsel *evsel;
1944
1945 evlist__for_each_entry(evlist, evsel) {
1946 if (!evsel->priv)
1947 break;
1948 perf_evsel_script__delete(evsel->priv);
1949 evsel->priv = NULL;
1950 }
1951}
1952
1953static int perf_script__fopen_per_event_dump(struct perf_script *script)
1954{
1955 struct perf_evsel *evsel;
1956
1957 evlist__for_each_entry(script->session->evlist, evsel) {
1958 evsel->priv = perf_evsel_script__new(evsel, script->session->data);
1959 if (evsel->priv == NULL)
1960 goto out_err_fclose;
1961 }
1962
1963 return 0;
1964
1965out_err_fclose:
1966 perf_script__fclose_per_event_dump(script);
1967 return -1;
1968}
1969
1970static int perf_script__setup_per_event_dump(struct perf_script *script)
1971{
1972 struct perf_evsel *evsel;
1973 static struct perf_evsel_script es_stdout;
1974
1975 if (script->per_event_dump)
1976 return perf_script__fopen_per_event_dump(script);
1977
1978 es_stdout.fp = stdout;
1979
1980 evlist__for_each_entry(script->session->evlist, evsel)
1981 evsel->priv = &es_stdout;
1982
1983 return 0;
1984}
1985
1986static void perf_script__exit_per_event_dump_stats(struct perf_script *script)
1987{
1988 struct perf_evsel *evsel;
1989
1990 evlist__for_each_entry(script->session->evlist, evsel) {
1991 struct perf_evsel_script *es = evsel->priv;
1992
1993 perf_evsel_script__fprintf(es, stdout);
1994 perf_evsel_script__delete(es);
1995 evsel->priv = NULL;
1996 }
1997}
1998
1835static int __cmd_script(struct perf_script *script) 1999static int __cmd_script(struct perf_script *script)
1836{ 2000{
1837 int ret; 2001 int ret;
@@ -1853,8 +2017,16 @@ static int __cmd_script(struct perf_script *script)
1853 if (script->show_namespace_events) 2017 if (script->show_namespace_events)
1854 script->tool.namespaces = process_namespaces_event; 2018 script->tool.namespaces = process_namespaces_event;
1855 2019
2020 if (perf_script__setup_per_event_dump(script)) {
2021 pr_err("Couldn't create the per event dump files\n");
2022 return -1;
2023 }
2024
1856 ret = perf_session__process_events(script->session); 2025 ret = perf_session__process_events(script->session);
1857 2026
2027 if (script->per_event_dump)
2028 perf_script__exit_per_event_dump_stats(script);
2029
1858 if (debug_mode) 2030 if (debug_mode)
1859 pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); 2031 pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
1860 2032
@@ -2419,14 +2591,16 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
2419 char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; 2591 char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
2420 DIR *scripts_dir, *lang_dir; 2592 DIR *scripts_dir, *lang_dir;
2421 struct perf_session *session; 2593 struct perf_session *session;
2422 struct perf_data_file file = { 2594 struct perf_data data = {
2423 .path = input_name, 2595 .file = {
2424 .mode = PERF_DATA_MODE_READ, 2596 .path = input_name,
2597 },
2598 .mode = PERF_DATA_MODE_READ,
2425 }; 2599 };
2426 char *temp; 2600 char *temp;
2427 int i = 0; 2601 int i = 0;
2428 2602
2429 session = perf_session__new(&file, false, NULL); 2603 session = perf_session__new(&data, false, NULL);
2430 if (!session) 2604 if (!session)
2431 return -1; 2605 return -1;
2432 2606
@@ -2704,7 +2878,7 @@ int cmd_script(int argc, const char **argv)
2704 .ordering_requires_timestamps = true, 2878 .ordering_requires_timestamps = true,
2705 }, 2879 },
2706 }; 2880 };
2707 struct perf_data_file file = { 2881 struct perf_data data = {
2708 .mode = PERF_DATA_MODE_READ, 2882 .mode = PERF_DATA_MODE_READ,
2709 }; 2883 };
2710 const struct option options[] = { 2884 const struct option options[] = {
@@ -2740,7 +2914,7 @@ int cmd_script(int argc, const char **argv)
2740 "+field to add and -field to remove." 2914 "+field to add and -field to remove."
2741 "Valid types: hw,sw,trace,raw,synth. " 2915 "Valid types: hw,sw,trace,raw,synth. "
2742 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2916 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
2743 "addr,symoff,period,iregs,brstack,brstacksym,flags," 2917 "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags,"
2744 "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", 2918 "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
2745 parse_output_fields), 2919 parse_output_fields),
2746 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2920 OPT_BOOLEAN('a', "all-cpus", &system_wide,
@@ -2772,6 +2946,8 @@ int cmd_script(int argc, const char **argv)
2772 "Show context switch events (if recorded)"), 2946 "Show context switch events (if recorded)"),
2773 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, 2947 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
2774 "Show namespace events (if recorded)"), 2948 "Show namespace events (if recorded)"),
2949 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
2950 "Dump trace output to files named by the monitored events"),
2775 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), 2951 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
2776 OPT_INTEGER(0, "max-blocks", &max_blocks, 2952 OPT_INTEGER(0, "max-blocks", &max_blocks,
2777 "Maximum number of code blocks to dump with brstackinsn"), 2953 "Maximum number of code blocks to dump with brstackinsn"),
@@ -2802,13 +2978,15 @@ int cmd_script(int argc, const char **argv)
2802 NULL 2978 NULL
2803 }; 2979 };
2804 2980
2981 perf_set_singlethreaded();
2982
2805 setup_scripting(); 2983 setup_scripting();
2806 2984
2807 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, 2985 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
2808 PARSE_OPT_STOP_AT_NON_OPTION); 2986 PARSE_OPT_STOP_AT_NON_OPTION);
2809 2987
2810 file.path = input_name; 2988 data.file.path = input_name;
2811 file.force = symbol_conf.force; 2989 data.force = symbol_conf.force;
2812 2990
2813 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { 2991 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
2814 rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); 2992 rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
@@ -2975,7 +3153,7 @@ int cmd_script(int argc, const char **argv)
2975 if (!script_name) 3153 if (!script_name)
2976 setup_pager(); 3154 setup_pager();
2977 3155
2978 session = perf_session__new(&file, false, &script.tool); 3156 session = perf_session__new(&data, false, &script.tool);
2979 if (session == NULL) 3157 if (session == NULL)
2980 return -1; 3158 return -1;
2981 3159
@@ -3016,7 +3194,8 @@ int cmd_script(int argc, const char **argv)
3016 machine__resolve_kernel_addr, 3194 machine__resolve_kernel_addr,
3017 &session->machines.host) < 0) { 3195 &session->machines.host) < 0) {
3018 pr_err("%s: failed to set libtraceevent function resolver\n", __func__); 3196 pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
3019 return -1; 3197 err = -1;
3198 goto out_delete;
3020 } 3199 }
3021 3200
3022 if (generate_script_lang) { 3201 if (generate_script_lang) {
@@ -3030,7 +3209,7 @@ int cmd_script(int argc, const char **argv)
3030 goto out_delete; 3209 goto out_delete;
3031 } 3210 }
3032 3211
3033 input = open(file.path, O_RDONLY); /* input_name */ 3212 input = open(data.file.path, O_RDONLY); /* input_name */
3034 if (input < 0) { 3213 if (input < 0) {
3035 err = -errno; 3214 err = -errno;
3036 perror("failed to open file"); 3215 perror("failed to open file");
@@ -3076,7 +3255,8 @@ int cmd_script(int argc, const char **argv)
3076 /* needs to be parsed after looking up reference time */ 3255 /* needs to be parsed after looking up reference time */
3077 if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { 3256 if (perf_time__parse_str(&script.ptime, script.time_str) != 0) {
3078 pr_err("Invalid time string\n"); 3257 pr_err("Invalid time string\n");
3079 return -EINVAL; 3258 err = -EINVAL;
3259 goto out_delete;
3080 } 3260 }
3081 3261
3082 err = __cmd_script(&script); 3262 err = __cmd_script(&script);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 69523ed55894..59af5a8419e2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
65#include "util/tool.h" 65#include "util/tool.h"
66#include "util/group.h" 66#include "util/group.h"
67#include "util/string2.h" 67#include "util/string2.h"
68#include "util/metricgroup.h"
68#include "asm/bug.h" 69#include "asm/bug.h"
69 70
70#include <linux/time64.h> 71#include <linux/time64.h>
@@ -133,6 +134,8 @@ static const char *smi_cost_attrs = {
133 134
134static struct perf_evlist *evsel_list; 135static struct perf_evlist *evsel_list;
135 136
137static struct rblist metric_events;
138
136static struct target target = { 139static struct target target = {
137 .uid = UINT_MAX, 140 .uid = UINT_MAX,
138}; 141};
@@ -172,7 +175,7 @@ static int print_free_counters_hint;
172 175
173struct perf_stat { 176struct perf_stat {
174 bool record; 177 bool record;
175 struct perf_data_file file; 178 struct perf_data data;
176 struct perf_session *session; 179 struct perf_session *session;
177 u64 bytes_written; 180 u64 bytes_written;
178 struct perf_tool tool; 181 struct perf_tool tool;
@@ -192,6 +195,11 @@ static struct perf_stat_config stat_config = {
192 .scale = true, 195 .scale = true,
193}; 196};
194 197
198static bool is_duration_time(struct perf_evsel *evsel)
199{
200 return !strcmp(evsel->name, "duration_time");
201}
202
195static inline void diff_timespec(struct timespec *r, struct timespec *a, 203static inline void diff_timespec(struct timespec *r, struct timespec *a,
196 struct timespec *b) 204 struct timespec *b)
197{ 205{
@@ -245,7 +253,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
245 * by attr->sample_type != 0, and we can't run it on 253 * by attr->sample_type != 0, and we can't run it on
246 * stat sessions. 254 * stat sessions.
247 */ 255 */
248 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 256 if (!(STAT_RECORD && perf_stat.data.is_pipe))
249 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 257 attr->sample_type = PERF_SAMPLE_IDENTIFIER;
250 258
251 /* 259 /*
@@ -287,7 +295,7 @@ static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
287 struct perf_sample *sample __maybe_unused, 295 struct perf_sample *sample __maybe_unused,
288 struct machine *machine __maybe_unused) 296 struct machine *machine __maybe_unused)
289{ 297{
290 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 298 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
291 pr_err("failed to write perf data, error: %m\n"); 299 pr_err("failed to write perf data, error: %m\n");
292 return -1; 300 return -1;
293 } 301 }
@@ -407,6 +415,8 @@ static void process_interval(void)
407 pr_err("failed to write stat round event\n"); 415 pr_err("failed to write stat round event\n");
408 } 416 }
409 417
418 init_stats(&walltime_nsecs_stats);
419 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
410 print_counters(&rs, 0, NULL); 420 print_counters(&rs, 0, NULL);
411} 421}
412 422
@@ -582,6 +592,32 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
582 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 592 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
583} 593}
584 594
595static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
596{
597 struct perf_evsel *c2, *leader;
598 bool is_open = true;
599
600 leader = evsel->leader;
601 pr_debug("Weak group for %s/%d failed\n",
602 leader->name, leader->nr_members);
603
604 /*
605 * for_each_group_member doesn't work here because it doesn't
606 * include the first entry.
607 */
608 evlist__for_each_entry(evsel_list, c2) {
609 if (c2 == evsel)
610 is_open = false;
611 if (c2->leader == leader) {
612 if (is_open)
613 perf_evsel__close(c2);
614 c2->leader = c2;
615 c2->nr_members = 0;
616 }
617 }
618 return leader;
619}
620
585static int __run_perf_stat(int argc, const char **argv) 621static int __run_perf_stat(int argc, const char **argv)
586{ 622{
587 int interval = stat_config.interval; 623 int interval = stat_config.interval;
@@ -592,7 +628,7 @@ static int __run_perf_stat(int argc, const char **argv)
592 size_t l; 628 size_t l;
593 int status = 0; 629 int status = 0;
594 const bool forks = (argc > 0); 630 const bool forks = (argc > 0);
595 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 631 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
596 struct perf_evsel_config_term *err_term; 632 struct perf_evsel_config_term *err_term;
597 633
598 if (interval) { 634 if (interval) {
@@ -618,6 +654,15 @@ static int __run_perf_stat(int argc, const char **argv)
618 evlist__for_each_entry(evsel_list, counter) { 654 evlist__for_each_entry(evsel_list, counter) {
619try_again: 655try_again:
620 if (create_perf_stat_counter(counter) < 0) { 656 if (create_perf_stat_counter(counter) < 0) {
657
658 /* Weak group failed. Reset the group. */
659 if ((errno == EINVAL || errno == EBADF) &&
660 counter->leader != counter &&
661 counter->weak_group) {
662 counter = perf_evsel__reset_weak_group(counter);
663 goto try_again;
664 }
665
621 /* 666 /*
622 * PPC returns ENXIO for HW counters until 2.6.37 667 * PPC returns ENXIO for HW counters until 2.6.37
623 * (behavior changed with commit b0a873e). 668 * (behavior changed with commit b0a873e).
@@ -674,10 +719,10 @@ try_again:
674 } 719 }
675 720
676 if (STAT_RECORD) { 721 if (STAT_RECORD) {
677 int err, fd = perf_data_file__fd(&perf_stat.file); 722 int err, fd = perf_data__fd(&perf_stat.data);
678 723
679 if (is_pipe) { 724 if (is_pipe) {
680 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 725 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
681 } else { 726 } else {
682 err = perf_session__write_header(perf_stat.session, evsel_list, 727 err = perf_session__write_header(perf_stat.session, evsel_list,
683 fd, false); 728 fd, false);
@@ -800,7 +845,7 @@ static void print_noise(struct perf_evsel *evsel, double avg)
800 if (run_count == 1) 845 if (run_count == 1)
801 return; 846 return;
802 847
803 ps = evsel->priv; 848 ps = evsel->stats;
804 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 849 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
805} 850}
806 851
@@ -1199,7 +1244,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
1199 1244
1200 perf_stat__print_shadow_stats(counter, uval, 1245 perf_stat__print_shadow_stats(counter, uval,
1201 first_shadow_cpu(counter, id), 1246 first_shadow_cpu(counter, id),
1202 &out); 1247 &out, &metric_events);
1203 if (!csv_output && !metric_only) { 1248 if (!csv_output && !metric_only) {
1204 print_noise(counter, noise); 1249 print_noise(counter, noise);
1205 print_running(run, ena); 1250 print_running(run, ena);
@@ -1222,8 +1267,7 @@ static void aggr_update_shadow(void)
1222 continue; 1267 continue;
1223 val += perf_counts(counter->counts, cpu, 0)->val; 1268 val += perf_counts(counter->counts, cpu, 0)->val;
1224 } 1269 }
1225 val = val * counter->scale; 1270 perf_stat__update_shadow_stats(counter, val,
1226 perf_stat__update_shadow_stats(counter, &val,
1227 first_shadow_cpu(counter, id)); 1271 first_shadow_cpu(counter, id));
1228 } 1272 }
1229 } 1273 }
@@ -1325,6 +1369,9 @@ static void print_aggr(char *prefix)
1325 ad.id = id = aggr_map->map[s]; 1369 ad.id = id = aggr_map->map[s];
1326 first = true; 1370 first = true;
1327 evlist__for_each_entry(evsel_list, counter) { 1371 evlist__for_each_entry(evsel_list, counter) {
1372 if (is_duration_time(counter))
1373 continue;
1374
1328 ad.val = ad.ena = ad.run = 0; 1375 ad.val = ad.ena = ad.run = 0;
1329 ad.nr = 0; 1376 ad.nr = 0;
1330 if (!collect_data(counter, aggr_cb, &ad)) 1377 if (!collect_data(counter, aggr_cb, &ad))
@@ -1384,7 +1431,7 @@ static void counter_aggr_cb(struct perf_evsel *counter, void *data,
1384 bool first __maybe_unused) 1431 bool first __maybe_unused)
1385{ 1432{
1386 struct caggr_data *cd = data; 1433 struct caggr_data *cd = data;
1387 struct perf_stat_evsel *ps = counter->priv; 1434 struct perf_stat_evsel *ps = counter->stats;
1388 1435
1389 cd->avg += avg_stats(&ps->res_stats[0]); 1436 cd->avg += avg_stats(&ps->res_stats[0]);
1390 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1437 cd->avg_enabled += avg_stats(&ps->res_stats[1]);
@@ -1468,6 +1515,8 @@ static void print_no_aggr_metric(char *prefix)
1468 if (prefix) 1515 if (prefix)
1469 fputs(prefix, stat_config.output); 1516 fputs(prefix, stat_config.output);
1470 evlist__for_each_entry(evsel_list, counter) { 1517 evlist__for_each_entry(evsel_list, counter) {
1518 if (is_duration_time(counter))
1519 continue;
1471 if (first) { 1520 if (first) {
1472 aggr_printout(counter, cpu, 0); 1521 aggr_printout(counter, cpu, 0);
1473 first = false; 1522 first = false;
@@ -1522,6 +1571,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
1522 1571
1523 /* Print metrics headers only */ 1572 /* Print metrics headers only */
1524 evlist__for_each_entry(evsel_list, counter) { 1573 evlist__for_each_entry(evsel_list, counter) {
1574 if (is_duration_time(counter))
1575 continue;
1525 os.evsel = counter; 1576 os.evsel = counter;
1526 out.ctx = &os; 1577 out.ctx = &os;
1527 out.print_metric = print_metric_header; 1578 out.print_metric = print_metric_header;
@@ -1530,7 +1581,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
1530 os.evsel = counter; 1581 os.evsel = counter;
1531 perf_stat__print_shadow_stats(counter, 0, 1582 perf_stat__print_shadow_stats(counter, 0,
1532 0, 1583 0,
1533 &out); 1584 &out,
1585 &metric_events);
1534 } 1586 }
1535 fputc('\n', stat_config.output); 1587 fputc('\n', stat_config.output);
1536} 1588}
@@ -1643,7 +1695,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
1643 char buf[64], *prefix = NULL; 1695 char buf[64], *prefix = NULL;
1644 1696
1645 /* Do not print anything if we record to the pipe. */ 1697 /* Do not print anything if we record to the pipe. */
1646 if (STAT_RECORD && perf_stat.file.is_pipe) 1698 if (STAT_RECORD && perf_stat.data.is_pipe)
1647 return; 1699 return;
1648 1700
1649 if (interval) 1701 if (interval)
@@ -1668,12 +1720,18 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
1668 print_aggr(prefix); 1720 print_aggr(prefix);
1669 break; 1721 break;
1670 case AGGR_THREAD: 1722 case AGGR_THREAD:
1671 evlist__for_each_entry(evsel_list, counter) 1723 evlist__for_each_entry(evsel_list, counter) {
1724 if (is_duration_time(counter))
1725 continue;
1672 print_aggr_thread(counter, prefix); 1726 print_aggr_thread(counter, prefix);
1727 }
1673 break; 1728 break;
1674 case AGGR_GLOBAL: 1729 case AGGR_GLOBAL:
1675 evlist__for_each_entry(evsel_list, counter) 1730 evlist__for_each_entry(evsel_list, counter) {
1731 if (is_duration_time(counter))
1732 continue;
1676 print_counter_aggr(counter, prefix); 1733 print_counter_aggr(counter, prefix);
1734 }
1677 if (metric_only) 1735 if (metric_only)
1678 fputc('\n', stat_config.output); 1736 fputc('\n', stat_config.output);
1679 break; 1737 break;
@@ -1681,8 +1739,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
1681 if (metric_only) 1739 if (metric_only)
1682 print_no_aggr_metric(prefix); 1740 print_no_aggr_metric(prefix);
1683 else { 1741 else {
1684 evlist__for_each_entry(evsel_list, counter) 1742 evlist__for_each_entry(evsel_list, counter) {
1743 if (is_duration_time(counter))
1744 continue;
1685 print_counter(counter, prefix); 1745 print_counter(counter, prefix);
1746 }
1686 } 1747 }
1687 break; 1748 break;
1688 case AGGR_UNSET: 1749 case AGGR_UNSET:
@@ -1754,6 +1815,13 @@ static int enable_metric_only(const struct option *opt __maybe_unused,
1754 return 0; 1815 return 0;
1755} 1816}
1756 1817
1818static int parse_metric_groups(const struct option *opt,
1819 const char *str,
1820 int unset __maybe_unused)
1821{
1822 return metricgroup__parse_groups(opt, str, &metric_events);
1823}
1824
1757static const struct option stat_options[] = { 1825static const struct option stat_options[] = {
1758 OPT_BOOLEAN('T', "transaction", &transaction_run, 1826 OPT_BOOLEAN('T', "transaction", &transaction_run,
1759 "hardware transaction statistics"), 1827 "hardware transaction statistics"),
@@ -1819,6 +1887,9 @@ static const struct option stat_options[] = {
1819 "measure topdown level 1 statistics"), 1887 "measure topdown level 1 statistics"),
1820 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 1888 OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1821 "measure SMI cost"), 1889 "measure SMI cost"),
1890 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
1891 "monitor specified metrics or metric groups (separated by ,)",
1892 parse_metric_groups),
1822 OPT_END() 1893 OPT_END()
1823}; 1894};
1824 1895
@@ -2334,20 +2405,20 @@ static void init_features(struct perf_session *session)
2334static int __cmd_record(int argc, const char **argv) 2405static int __cmd_record(int argc, const char **argv)
2335{ 2406{
2336 struct perf_session *session; 2407 struct perf_session *session;
2337 struct perf_data_file *file = &perf_stat.file; 2408 struct perf_data *data = &perf_stat.data;
2338 2409
2339 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2410 argc = parse_options(argc, argv, stat_options, stat_record_usage,
2340 PARSE_OPT_STOP_AT_NON_OPTION); 2411 PARSE_OPT_STOP_AT_NON_OPTION);
2341 2412
2342 if (output_name) 2413 if (output_name)
2343 file->path = output_name; 2414 data->file.path = output_name;
2344 2415
2345 if (run_count != 1 || forever) { 2416 if (run_count != 1 || forever) {
2346 pr_err("Cannot use -r option with perf stat record.\n"); 2417 pr_err("Cannot use -r option with perf stat record.\n");
2347 return -1; 2418 return -1;
2348 } 2419 }
2349 2420
2350 session = perf_session__new(file, false, NULL); 2421 session = perf_session__new(data, false, NULL);
2351 if (session == NULL) { 2422 if (session == NULL) {
2352 pr_err("Perf session creation failed.\n"); 2423 pr_err("Perf session creation failed.\n");
2353 return -1; 2424 return -1;
@@ -2405,7 +2476,7 @@ int process_stat_config_event(struct perf_tool *tool,
2405 if (st->aggr_mode != AGGR_UNSET) 2476 if (st->aggr_mode != AGGR_UNSET)
2406 stat_config.aggr_mode = st->aggr_mode; 2477 stat_config.aggr_mode = st->aggr_mode;
2407 2478
2408 if (perf_stat.file.is_pipe) 2479 if (perf_stat.data.is_pipe)
2409 perf_stat_init_aggr_mode(); 2480 perf_stat_init_aggr_mode();
2410 else 2481 else
2411 perf_stat_init_aggr_mode_file(st); 2482 perf_stat_init_aggr_mode_file(st);
@@ -2513,10 +2584,10 @@ static int __cmd_report(int argc, const char **argv)
2513 input_name = "perf.data"; 2584 input_name = "perf.data";
2514 } 2585 }
2515 2586
2516 perf_stat.file.path = input_name; 2587 perf_stat.data.file.path = input_name;
2517 perf_stat.file.mode = PERF_DATA_MODE_READ; 2588 perf_stat.data.mode = PERF_DATA_MODE_READ;
2518 2589
2519 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2590 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
2520 if (session == NULL) 2591 if (session == NULL)
2521 return -1; 2592 return -1;
2522 2593
@@ -2787,7 +2858,7 @@ int cmd_stat(int argc, const char **argv)
2787 * records, but the need to suppress the kptr_restrict messages in older 2858 * records, but the need to suppress the kptr_restrict messages in older
2788 * tools remain -acme 2859 * tools remain -acme
2789 */ 2860 */
2790 int fd = perf_data_file__fd(&perf_stat.file); 2861 int fd = perf_data__fd(&perf_stat.data);
2791 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2862 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2792 process_synthesized_event, 2863 process_synthesized_event,
2793 &perf_stat.session->machines.host); 2864 &perf_stat.session->machines.host);
@@ -2801,7 +2872,7 @@ int cmd_stat(int argc, const char **argv)
2801 pr_err("failed to write stat round event\n"); 2872 pr_err("failed to write stat round event\n");
2802 } 2873 }
2803 2874
2804 if (!perf_stat.file.is_pipe) { 2875 if (!perf_stat.data.is_pipe) {
2805 perf_stat.session->header.data_size += perf_stat.bytes_written; 2876 perf_stat.session->header.data_size += perf_stat.bytes_written;
2806 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2877 perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2807 } 2878 }
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 4e2e61695986..813698a9b8c7 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1601,13 +1601,15 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
1601 { "syscalls:sys_exit_pselect6", process_exit_poll }, 1601 { "syscalls:sys_exit_pselect6", process_exit_poll },
1602 { "syscalls:sys_exit_select", process_exit_poll }, 1602 { "syscalls:sys_exit_select", process_exit_poll },
1603 }; 1603 };
1604 struct perf_data_file file = { 1604 struct perf_data data = {
1605 .path = input_name, 1605 .file = {
1606 .mode = PERF_DATA_MODE_READ, 1606 .path = input_name,
1607 .force = tchart->force, 1607 },
1608 .mode = PERF_DATA_MODE_READ,
1609 .force = tchart->force,
1608 }; 1610 };
1609 1611
1610 struct perf_session *session = perf_session__new(&file, false, 1612 struct perf_session *session = perf_session__new(&data, false,
1611 &tchart->tool); 1613 &tchart->tool);
1612 int ret = -EINVAL; 1614 int ret = -EINVAL;
1613 1615
@@ -1617,7 +1619,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
1617 symbol__init(&session->header.env); 1619 symbol__init(&session->header.env);
1618 1620
1619 (void)perf_header__process_sections(&session->header, 1621 (void)perf_header__process_sections(&session->header,
1620 perf_data_file__fd(session->file), 1622 perf_data__fd(session->data),
1621 tchart, 1623 tchart,
1622 process_header); 1624 process_header);
1623 1625
@@ -1732,8 +1734,10 @@ static int timechart__io_record(int argc, const char **argv)
1732 if (rec_argv == NULL) 1734 if (rec_argv == NULL)
1733 return -ENOMEM; 1735 return -ENOMEM;
1734 1736
1735 if (asprintf(&filter, "common_pid != %d", getpid()) < 0) 1737 if (asprintf(&filter, "common_pid != %d", getpid()) < 0) {
1738 free(rec_argv);
1736 return -ENOMEM; 1739 return -ENOMEM;
1740 }
1737 1741
1738 p = rec_argv; 1742 p = rec_argv;
1739 for (i = 0; i < common_args_nr; i++) 1743 for (i = 0; i < common_args_nr; i++)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ee954bde7e3e..477a8699f0b5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -958,8 +958,16 @@ static int __cmd_top(struct perf_top *top)
958 if (perf_session__register_idle_thread(top->session) < 0) 958 if (perf_session__register_idle_thread(top->session) < 0)
959 goto out_delete; 959 goto out_delete;
960 960
961 if (top->nr_threads_synthesize > 1)
962 perf_set_multithreaded();
963
961 machine__synthesize_threads(&top->session->machines.host, &opts->target, 964 machine__synthesize_threads(&top->session->machines.host, &opts->target,
962 top->evlist->threads, false, opts->proc_map_timeout); 965 top->evlist->threads, false,
966 opts->proc_map_timeout,
967 top->nr_threads_synthesize);
968
969 if (top->nr_threads_synthesize > 1)
970 perf_set_singlethreaded();
963 971
964 if (perf_hpp_list.socket) { 972 if (perf_hpp_list.socket) {
965 ret = perf_env__read_cpu_topology_map(&perf_env); 973 ret = perf_env__read_cpu_topology_map(&perf_env);
@@ -1112,6 +1120,7 @@ int cmd_top(int argc, const char **argv)
1112 }, 1120 },
1113 .max_stack = sysctl_perf_event_max_stack, 1121 .max_stack = sysctl_perf_event_max_stack,
1114 .sym_pcnt_filter = 5, 1122 .sym_pcnt_filter = 5,
1123 .nr_threads_synthesize = UINT_MAX,
1115 }; 1124 };
1116 struct record_opts *opts = &top.record_opts; 1125 struct record_opts *opts = &top.record_opts;
1117 struct target *target = &opts->target; 1126 struct target *target = &opts->target;
@@ -1221,6 +1230,8 @@ int cmd_top(int argc, const char **argv)
1221 OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, 1230 OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
1222 "Show entries in a hierarchy"), 1231 "Show entries in a hierarchy"),
1223 OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), 1232 OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
1233 OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
1234 "number of thread to run event synthesize"),
1224 OPT_END() 1235 OPT_END()
1225 }; 1236 };
1226 const char * const top_usage[] = { 1237 const char * const top_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d5d7fff1c211..f2757d38c7d7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -578,7 +578,6 @@ static struct syscall_fmt {
578} syscall_fmts[] = { 578} syscall_fmts[] = {
579 { .name = "access", 579 { .name = "access",
580 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, 580 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
581 { .name = "arch_prctl", .alias = "prctl", },
582 { .name = "bpf", 581 { .name = "bpf",
583 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, 582 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
584 { .name = "brk", .hexret = true, 583 { .name = "brk", .hexret = true,
@@ -634,6 +633,12 @@ static struct syscall_fmt {
634#else 633#else
635 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 634 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
636#endif 635#endif
636 { .name = "kcmp", .nr_args = 5,
637 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
638 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
639 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
640 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
641 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
637 { .name = "keyctl", 642 { .name = "keyctl",
638 .arg = { [0] = STRARRAY(option, keyctl_options), }, }, 643 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
639 { .name = "kill", 644 { .name = "kill",
@@ -703,6 +708,10 @@ static struct syscall_fmt {
703 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, 708 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
704 { .name = "poll", .timeout = true, }, 709 { .name = "poll", .timeout = true, },
705 { .name = "ppoll", .timeout = true, }, 710 { .name = "ppoll", .timeout = true, },
711 { .name = "prctl", .alias = "arch_prctl",
712 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
713 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
714 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
706 { .name = "pread", .alias = "pread64", }, 715 { .name = "pread", .alias = "pread64", },
707 { .name = "preadv", .alias = "pread", }, 716 { .name = "preadv", .alias = "pread", },
708 { .name = "prlimit64", 717 { .name = "prlimit64",
@@ -985,6 +994,23 @@ size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
985 return printed; 994 return printed;
986} 995}
987 996
997size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
998{
999 size_t printed = scnprintf(bf, size, "%d", fd);
1000 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1001
1002 if (thread) {
1003 const char *path = thread__fd_path(thread, fd, trace);
1004
1005 if (path)
1006 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1007
1008 thread__put(thread);
1009 }
1010
1011 return printed;
1012}
1013
988static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1014static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
989 struct syscall_arg *arg) 1015 struct syscall_arg *arg)
990{ 1016{
@@ -1131,7 +1157,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1131 1157
1132 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1158 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1133 evlist->threads, trace__tool_process, false, 1159 evlist->threads, trace__tool_process, false,
1134 trace->opts.proc_map_timeout); 1160 trace->opts.proc_map_timeout, 1);
1135 if (err) 1161 if (err)
1136 symbol__exit(); 1162 symbol__exit();
1137 1163
@@ -1836,16 +1862,14 @@ out_dump:
1836 goto out_put; 1862 goto out_put;
1837} 1863}
1838 1864
1839static void bpf_output__printer(enum binary_printer_ops op, 1865static int bpf_output__printer(enum binary_printer_ops op,
1840 unsigned int val, void *extra) 1866 unsigned int val, void *extra __maybe_unused, FILE *fp)
1841{ 1867{
1842 FILE *output = extra;
1843 unsigned char ch = (unsigned char)val; 1868 unsigned char ch = (unsigned char)val;
1844 1869
1845 switch (op) { 1870 switch (op) {
1846 case BINARY_PRINT_CHAR_DATA: 1871 case BINARY_PRINT_CHAR_DATA:
1847 fprintf(output, "%c", isprint(ch) ? ch : '.'); 1872 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1848 break;
1849 case BINARY_PRINT_DATA_BEGIN: 1873 case BINARY_PRINT_DATA_BEGIN:
1850 case BINARY_PRINT_LINE_BEGIN: 1874 case BINARY_PRINT_LINE_BEGIN:
1851 case BINARY_PRINT_ADDR: 1875 case BINARY_PRINT_ADDR:
@@ -1858,13 +1882,15 @@ static void bpf_output__printer(enum binary_printer_ops op,
1858 default: 1882 default:
1859 break; 1883 break;
1860 } 1884 }
1885
1886 return 0;
1861} 1887}
1862 1888
1863static void bpf_output__fprintf(struct trace *trace, 1889static void bpf_output__fprintf(struct trace *trace,
1864 struct perf_sample *sample) 1890 struct perf_sample *sample)
1865{ 1891{
1866 print_binary(sample->raw_data, sample->raw_size, 8, 1892 binary__fprintf(sample->raw_data, sample->raw_size, 8,
1867 bpf_output__printer, trace->output); 1893 bpf_output__printer, NULL, trace->output);
1868} 1894}
1869 1895
1870static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1896static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
@@ -2086,6 +2112,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
2086 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2112 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2087 else { 2113 else {
2088 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2114 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2115 free(rec_argv);
2089 return -1; 2116 return -1;
2090 } 2117 }
2091 } 2118 }
@@ -2538,10 +2565,12 @@ static int trace__replay(struct trace *trace)
2538 const struct perf_evsel_str_handler handlers[] = { 2565 const struct perf_evsel_str_handler handlers[] = {
2539 { "probe:vfs_getname", trace__vfs_getname, }, 2566 { "probe:vfs_getname", trace__vfs_getname, },
2540 }; 2567 };
2541 struct perf_data_file file = { 2568 struct perf_data data = {
2542 .path = input_name, 2569 .file = {
2543 .mode = PERF_DATA_MODE_READ, 2570 .path = input_name,
2544 .force = trace->force, 2571 },
2572 .mode = PERF_DATA_MODE_READ,
2573 .force = trace->force,
2545 }; 2574 };
2546 struct perf_session *session; 2575 struct perf_session *session;
2547 struct perf_evsel *evsel; 2576 struct perf_evsel *evsel;
@@ -2564,7 +2593,7 @@ static int trace__replay(struct trace *trace)
2564 /* add tid to output */ 2593 /* add tid to output */
2565 trace->multiple_threads = true; 2594 trace->multiple_threads = true;
2566 2595
2567 session = perf_session__new(&file, false, &trace->tool); 2596 session = perf_session__new(&data, false, &trace->tool);
2568 if (session == NULL) 2597 if (session == NULL)
2569 return -1; 2598 return -1;
2570 2599
@@ -2740,20 +2769,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event
2740 2769
2741static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2770static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2742{ 2771{
2743 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2744 size_t printed = trace__fprintf_threads_header(fp); 2772 size_t printed = trace__fprintf_threads_header(fp);
2745 struct rb_node *nd; 2773 struct rb_node *nd;
2774 int i;
2746 2775
2747 if (threads == NULL) { 2776 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2748 fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 2777 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
2749 return 0;
2750 }
2751 2778
2752 resort_rb__for_each_entry(nd, threads) 2779 if (threads == NULL) {
2753 printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 2780 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2781 return 0;
2782 }
2754 2783
2755 resort_rb__delete(threads); 2784 resort_rb__for_each_entry(nd, threads)
2785 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2756 2786
2787 resort_rb__delete(threads);
2788 }
2757 return printed; 2789 return printed;
2758} 2790}
2759 2791
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 50cd6228f506..77406d25e521 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -5,8 +5,10 @@ HEADERS='
5include/uapi/drm/drm.h 5include/uapi/drm/drm.h
6include/uapi/drm/i915_drm.h 6include/uapi/drm/i915_drm.h
7include/uapi/linux/fcntl.h 7include/uapi/linux/fcntl.h
8include/uapi/linux/kcmp.h
8include/uapi/linux/kvm.h 9include/uapi/linux/kvm.h
9include/uapi/linux/perf_event.h 10include/uapi/linux/perf_event.h
11include/uapi/linux/prctl.h
10include/uapi/linux/sched.h 12include/uapi/linux/sched.h
11include/uapi/linux/stat.h 13include/uapi/linux/stat.h
12include/uapi/linux/vhost.h 14include/uapi/linux/vhost.h
@@ -58,6 +60,11 @@ check () {
58} 60}
59 61
60 62
63# Check if we have the kernel headers (tools/perf/../../include), else
64# we're probably on a detached tarball, so no point in trying to check
65# differences.
66test -d ../../include || exit 0
67
61# simple diff check 68# simple diff check
62for i in $HEADERS; do 69for i in $HEADERS; do
63 check $i -B 70 check $i -B
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index f75f3dec7485..2357f4ccc9c7 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -66,6 +66,7 @@ struct record_opts {
66 unsigned int user_freq; 66 unsigned int user_freq;
67 u64 branch_stack; 67 u64 branch_stack;
68 u64 sample_intr_regs; 68 u64 sample_intr_regs;
69 u64 sample_user_regs;
69 u64 default_interval; 70 u64 default_interval;
70 u64 user_interval; 71 u64 user_interval;
71 size_t auxtrace_snapshot_size; 72 size_t auxtrace_snapshot_size;
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
new file mode 100644
index 000000000000..00bfdb5c5acb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
new file mode 100644
index 000000000000..49c5f123d811
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
new file mode 100644
index 000000000000..5a7f1ec24200
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
new file mode 100644
index 000000000000..b4791b443a66
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
@@ -0,0 +1,1453 @@
1[
2 {
3 "CollectPEBSRecord": "1",
4 "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.",
5 "EventCode": "0x2E",
6 "Counter": "0,1,2,3",
7 "UMask": "0x41",
8 "PEBScounters": "0,1,2,3",
9 "EventName": "LONGEST_LAT_CACHE.MISS",
10 "PDIR_COUNTER": "na",
11 "SampleAfterValue": "200003",
12 "BriefDescription": "L2 cache request misses"
13 },
14 {
15 "CollectPEBSRecord": "1",
16 "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.",
17 "EventCode": "0x2E",
18 "Counter": "0,1,2,3",
19 "UMask": "0x4f",
20 "PEBScounters": "0,1,2,3",
21 "EventName": "LONGEST_LAT_CACHE.REFERENCE",
22 "PDIR_COUNTER": "na",
23 "SampleAfterValue": "200003",
24 "BriefDescription": "L2 cache requests"
25 },
26 {
27 "CollectPEBSRecord": "1",
28 "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.",
29 "EventCode": "0x30",
30 "Counter": "0,1,2,3",
31 "UMask": "0x0",
32 "PEBScounters": "0,1,2,3",
33 "EventName": "L2_REJECT_XQ.ALL",
34 "PDIR_COUNTER": "na",
35 "SampleAfterValue": "200003",
36 "BriefDescription": "Requests rejected by the XQ"
37 },
38 {
39 "CollectPEBSRecord": "1",
40 "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.",
41 "EventCode": "0x31",
42 "Counter": "0,1,2,3",
43 "UMask": "0x0",
44 "PEBScounters": "0,1,2,3",
45 "EventName": "CORE_REJECT_L2Q.ALL",
46 "PDIR_COUNTER": "na",
47 "SampleAfterValue": "200003",
48 "BriefDescription": "Requests rejected by the L2Q"
49 },
50 {
51 "CollectPEBSRecord": "1",
52 "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.",
53 "EventCode": "0x51",
54 "Counter": "0,1,2,3",
55 "UMask": "0x1",
56 "PEBScounters": "0,1,2,3",
57 "EventName": "DL1.REPLACEMENT",
58 "PDIR_COUNTER": "na",
59 "SampleAfterValue": "200003",
60 "BriefDescription": "L1 Cache evictions for dirty data"
61 },
62 {
63 "CollectPEBSRecord": "1",
64 "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.",
65 "EventCode": "0x86",
66 "Counter": "0,1,2,3",
67 "UMask": "0x2",
68 "PEBScounters": "0,1,2,3",
69 "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
70 "PDIR_COUNTER": "na",
71 "SampleAfterValue": "200003",
72 "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
73 },
74 {
75 "CollectPEBSRecord": "1",
76 "EventCode": "0xB7",
77 "Counter": "0,1,2,3",
78 "UMask": "0x1",
79 "PEBScounters": "0,1,2,3",
80 "EventName": "OFFCORE_RESPONSE",
81 "PDIR_COUNTER": "na",
82 "SampleAfterValue": "100007",
83 "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)"
84 },
85 {
86 "PEBS": "2",
87 "CollectPEBSRecord": "2",
88 "PublicDescription": "Counts locked memory uops retired. This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.",
89 "EventCode": "0xD0",
90 "Counter": "0,1,2,3",
91 "UMask": "0x21",
92 "PEBScounters": "0,1,2,3",
93 "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
94 "SampleAfterValue": "200003",
95 "BriefDescription": "Locked load uops retired (Precise event capable)"
96 },
97 {
98 "PEBS": "2",
99 "CollectPEBSRecord": "2",
100 "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.",
101 "EventCode": "0xD0",
102 "Counter": "0,1,2,3",
103 "UMask": "0x41",
104 "PEBScounters": "0,1,2,3",
105 "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
106 "SampleAfterValue": "200003",
107 "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
108 },
109 {
110 "PEBS": "2",
111 "CollectPEBSRecord": "2",
112 "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.",
113 "EventCode": "0xD0",
114 "Counter": "0,1,2,3",
115 "UMask": "0x42",
116 "PEBScounters": "0,1,2,3",
117 "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
118 "SampleAfterValue": "200003",
119 "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
120 },
121 {
122 "PEBS": "2",
123 "CollectPEBSRecord": "2",
124 "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.",
125 "EventCode": "0xD0",
126 "Counter": "0,1,2,3",
127 "UMask": "0x43",
128 "PEBScounters": "0,1,2,3",
129 "EventName": "MEM_UOPS_RETIRED.SPLIT",
130 "SampleAfterValue": "200003",
131 "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
132 },
133 {
134 "PEBS": "2",
135 "CollectPEBSRecord": "2",
136 "PublicDescription": "Counts the number of load uops retired.",
137 "EventCode": "0xD0",
138 "Counter": "0,1,2,3",
139 "UMask": "0x81",
140 "PEBScounters": "0,1,2,3",
141 "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
142 "SampleAfterValue": "200003",
143 "BriefDescription": "Load uops retired (Precise event capable)"
144 },
145 {
146 "PEBS": "2",
147 "CollectPEBSRecord": "2",
148 "PublicDescription": "Counts the number of store uops retired.",
149 "EventCode": "0xD0",
150 "Counter": "0,1,2,3",
151 "UMask": "0x82",
152 "PEBScounters": "0,1,2,3",
153 "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
154 "SampleAfterValue": "200003",
155 "BriefDescription": "Store uops retired (Precise event capable)"
156 },
157 {
158 "PEBS": "2",
159 "CollectPEBSRecord": "2",
160 "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.",
161 "EventCode": "0xD0",
162 "Counter": "0,1,2,3",
163 "UMask": "0x83",
164 "PEBScounters": "0,1,2,3",
165 "EventName": "MEM_UOPS_RETIRED.ALL",
166 "SampleAfterValue": "200003",
167 "BriefDescription": "Memory uops retired (Precise event capable)"
168 },
169 {
170 "PEBS": "2",
171 "CollectPEBSRecord": "2",
172 "PublicDescription": "Counts load uops retired that hit the L1 data cache.",
173 "EventCode": "0xD1",
174 "Counter": "0,1,2,3",
175 "UMask": "0x1",
176 "PEBScounters": "0,1,2,3",
177 "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
178 "SampleAfterValue": "200003",
179 "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
180 },
181 {
182 "PEBS": "2",
183 "CollectPEBSRecord": "2",
184 "PublicDescription": "Counts load uops retired that hit in the L2 cache.",
185 "EventCode": "0xD1",
186 "Counter": "0,1,2,3",
187 "UMask": "0x2",
188 "PEBScounters": "0,1,2,3",
189 "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
190 "SampleAfterValue": "200003",
191 "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
192 },
193 {
194 "PEBS": "2",
195 "CollectPEBSRecord": "2",
196 "PublicDescription": "Counts load uops retired that miss the L1 data cache.",
197 "EventCode": "0xD1",
198 "Counter": "0,1,2,3",
199 "UMask": "0x8",
200 "PEBScounters": "0,1,2,3",
201 "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
202 "SampleAfterValue": "200003",
203 "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
204 },
205 {
206 "PEBS": "2",
207 "CollectPEBSRecord": "2",
208 "PublicDescription": "Counts load uops retired that miss in the L2 cache.",
209 "EventCode": "0xD1",
210 "Counter": "0,1,2,3",
211 "UMask": "0x10",
212 "PEBScounters": "0,1,2,3",
213 "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
214 "SampleAfterValue": "200003",
215 "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
216 },
217 {
218 "PEBS": "2",
219 "CollectPEBSRecord": "2",
220 "PublicDescription": "Counts load uops retired where the cache line containing the data was in the modified state of another core or modules cache (HITM). More specifically, this means that when the load address was checked by other caching agents (typically another processor) in the system, one of those caching agents indicated that they had a dirty copy of the data. Loads that obtain a HITM response incur greater latency than most is typical for a load. In addition, since HITM indicates that some other processor had this data in its cache, it implies that the data was shared between processors, or potentially was a lock or semaphore value. This event is useful for locating sharing, false sharing, and contended locks.",
221 "EventCode": "0xD1",
222 "Counter": "0,1,2,3",
223 "UMask": "0x20",
224 "PEBScounters": "0,1,2,3",
225 "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
226 "SampleAfterValue": "200003",
227 "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
228 },
229 {
230 "PEBS": "2",
231 "CollectPEBSRecord": "2",
232 "PublicDescription": "Counts memory load uops retired where the data is retrieved from the WCB (or fill buffer), indicating that the load found its data while that data was in the process of being brought into the L1 cache. Typically a load will receive this indication when some other load or prefetch missed the L1 cache and was in the process of retrieving the cache line containing the data, but that process had not yet finished (and written the data back to the cache). For example, consider load X and Y, both referencing the same cache line that is not in the L1 cache. If load X misses cache first, it obtains and WCB (or fill buffer) and begins the process of requesting the data. When load Y requests the data, it will either hit the WCB, or the L1 cache, depending on exactly what time the request to Y occurs.",
233 "EventCode": "0xD1",
234 "Counter": "0,1,2,3",
235 "UMask": "0x40",
236 "PEBScounters": "0,1,2,3",
237 "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
238 "SampleAfterValue": "200003",
239 "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
240 },
241 {
242 "PEBS": "2",
243 "CollectPEBSRecord": "2",
244 "PublicDescription": "Counts memory load uops retired where the data is retrieved from DRAM. Event is counted at retirement, so the speculative loads are ignored. A memory load can hit (or miss) the L1 cache, hit (or miss) the L2 cache, hit DRAM, hit in the WCB or receive a HITM response.",
245 "EventCode": "0xD1",
246 "Counter": "0,1,2,3",
247 "UMask": "0x80",
248 "PEBScounters": "0,1,2,3",
249 "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
250 "SampleAfterValue": "200003",
251 "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
252 },
253 {
254 "CollectPEBSRecord": "1",
255 "PublicDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
256 "EventCode": "0xB7",
257 "MSRValue": "0x0000010001",
258 "Counter": "0,1,2,3",
259 "UMask": "0x1",
260 "PEBScounters": "0,1,2,3",
261 "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
262 "PDIR_COUNTER": "na",
263 "MSRIndex": "0x1a6, 0x1a7",
264 "SampleAfterValue": "100007",
265 "BriefDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem.",
266 "Offcore": "1"
267 },
268 {
269 "CollectPEBSRecord": "1",
270 "PublicDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
271 "EventCode": "0xB7",
272 "MSRValue": "0x0000040001",
273 "Counter": "0,1,2,3",
274 "UMask": "0x1",
275 "PEBScounters": "0,1,2,3",
276 "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT",
277 "PDIR_COUNTER": "na",
278 "MSRIndex": "0x1a6, 0x1a7",
279 "SampleAfterValue": "100007",
280 "BriefDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache.",
281 "Offcore": "1"
282 },
283 {
284 "CollectPEBSRecord": "1",
285 "PublicDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
286 "EventCode": "0xB7",
287 "MSRValue": "0x0200000001",
288 "Counter": "0,1,2,3",
289 "UMask": "0x1",
290 "PEBScounters": "0,1,2,3",
291 "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
292 "PDIR_COUNTER": "na",
293 "MSRIndex": "0x1a6, 0x1a7",
294 "SampleAfterValue": "100007",
295 "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. ",
296 "Offcore": "1"
297 },
298 {
299 "CollectPEBSRecord": "1",
300 "PublicDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
301 "EventCode": "0xB7",
302 "MSRValue": "0x1000000001",
303 "Counter": "0,1,2,3",
304 "UMask": "0x1",
305 "PEBScounters": "0,1,2,3",
306 "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
307 "PDIR_COUNTER": "na",
308 "MSRIndex": "0x1a6, 0x1a7",
309 "SampleAfterValue": "100007",
310 "BriefDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
311 "Offcore": "1"
312 },
313 {
314 "CollectPEBSRecord": "1",
315 "PublicDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
316 "EventCode": "0xB7",
317 "MSRValue": "0x4000000001",
318 "Counter": "0,1,2,3",
319 "UMask": "0x1",
320 "PEBScounters": "0,1,2,3",
321 "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
322 "PDIR_COUNTER": "na",
323 "MSRIndex": "0x1a6",
324 "SampleAfterValue": "100007",
325 "BriefDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received.",
326 "Offcore": "1"
327 },
328 {
329 "CollectPEBSRecord": "1",
330 "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
331 "EventCode": "0xB7",
332 "MSRValue": "0x0000010002",
333 "Counter": "0,1,2,3",
334 "UMask": "0x1",
335 "PEBScounters": "0,1,2,3",
336 "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
337 "PDIR_COUNTER": "na",
338 "MSRIndex": "0x1a6, 0x1a7",
339 "SampleAfterValue": "100007",
340 "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem.",
341 "Offcore": "1"
342 },
343 {
344 "CollectPEBSRecord": "1",
345 "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
346 "EventCode": "0xB7",
347 "MSRValue": "0x0000040002",
348 "Counter": "0,1,2,3",
349 "UMask": "0x1",
350 "PEBScounters": "0,1,2,3",
351 "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT",
352 "PDIR_COUNTER": "na",
353 "MSRIndex": "0x1a6, 0x1a7",
354 "SampleAfterValue": "100007",
355 "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache.",
356 "Offcore": "1"
357 },
358 {
359 "CollectPEBSRecord": "1",
360 "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
361 "EventCode": "0xB7",
362 "MSRValue": "0x0200000002",
363 "Counter": "0,1,2,3",
364 "UMask": "0x1",
365 "PEBScounters": "0,1,2,3",
366 "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
367 "PDIR_COUNTER": "na",
368 "MSRIndex": "0x1a6, 0x1a7",
369 "SampleAfterValue": "100007",
370 "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. ",
371 "Offcore": "1"
372 },
373 {
374 "CollectPEBSRecord": "1",
375 "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
376 "EventCode": "0xB7",
377 "MSRValue": "0x1000000002",
378 "Counter": "0,1,2,3",
379 "UMask": "0x1",
380 "PEBScounters": "0,1,2,3",
381 "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
382 "PDIR_COUNTER": "na",
383 "MSRIndex": "0x1a6, 0x1a7",
384 "SampleAfterValue": "100007",
385 "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
386 "Offcore": "1"
387 },
388 {
389 "CollectPEBSRecord": "1",
390 "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
391 "EventCode": "0xB7",
392 "MSRValue": "0x4000000002",
393 "Counter": "0,1,2,3",
394 "UMask": "0x1",
395 "PEBScounters": "0,1,2,3",
396 "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
397 "PDIR_COUNTER": "na",
398 "MSRIndex": "0x1a6",
399 "SampleAfterValue": "100007",
400 "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received.",
401 "Offcore": "1"
402 },
403 {
404 "CollectPEBSRecord": "1",
405 "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
406 "EventCode": "0xB7",
407 "MSRValue": "0x0000010004",
408 "Counter": "0,1,2,3",
409 "UMask": "0x1",
410 "PEBScounters": "0,1,2,3",
411 "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
412 "PDIR_COUNTER": "na",
413 "MSRIndex": "0x1a6, 0x1a7",
414 "SampleAfterValue": "100007",
415 "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem.",
416 "Offcore": "1"
417 },
418 {
419 "CollectPEBSRecord": "1",
420 "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
421 "EventCode": "0xB7",
422 "MSRValue": "0x0000040004",
423 "Counter": "0,1,2,3",
424 "UMask": "0x1",
425 "PEBScounters": "0,1,2,3",
426 "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT",
427 "PDIR_COUNTER": "na",
428 "MSRIndex": "0x1a6, 0x1a7",
429 "SampleAfterValue": "100007",
430 "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache.",
431 "Offcore": "1"
432 },
433 {
434 "CollectPEBSRecord": "1",
435 "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
436 "EventCode": "0xB7",
437 "MSRValue": "0x0200000004",
438 "Counter": "0,1,2,3",
439 "UMask": "0x1",
440 "PEBScounters": "0,1,2,3",
441 "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
442 "PDIR_COUNTER": "na",
443 "MSRIndex": "0x1a6, 0x1a7",
444 "SampleAfterValue": "100007",
445 "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. ",
446 "Offcore": "1"
447 },
448 {
449 "CollectPEBSRecord": "1",
450 "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
451 "EventCode": "0xB7",
452 "MSRValue": "0x1000000004",
453 "Counter": "0,1,2,3",
454 "UMask": "0x1",
455 "PEBScounters": "0,1,2,3",
456 "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE",
457 "PDIR_COUNTER": "na",
458 "MSRIndex": "0x1a6, 0x1a7",
459 "SampleAfterValue": "100007",
460 "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
461 "Offcore": "1"
462 },
463 {
464 "CollectPEBSRecord": "1",
465 "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
466 "EventCode": "0xB7",
467 "MSRValue": "0x4000000004",
468 "Counter": "0,1,2,3",
469 "UMask": "0x1",
470 "PEBScounters": "0,1,2,3",
471 "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
472 "PDIR_COUNTER": "na",
473 "MSRIndex": "0x1a6",
474 "SampleAfterValue": "100007",
475 "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received.",
476 "Offcore": "1"
477 },
478 {
479 "CollectPEBSRecord": "1",
480 "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
481 "EventCode": "0xB7",
482 "MSRValue": "0x0000010008",
483 "Counter": "0,1,2,3",
484 "UMask": "0x1",
485 "PEBScounters": "0,1,2,3",
486 "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
487 "PDIR_COUNTER": "na",
488 "MSRIndex": "0x1a6, 0x1a7",
489 "SampleAfterValue": "100007",
490 "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem.",
491 "Offcore": "1"
492 },
493 {
494 "CollectPEBSRecord": "1",
495 "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
496 "EventCode": "0xB7",
497 "MSRValue": "0x0000040008",
498 "Counter": "0,1,2,3",
499 "UMask": "0x1",
500 "PEBScounters": "0,1,2,3",
501 "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT",
502 "PDIR_COUNTER": "na",
503 "MSRIndex": "0x1a6, 0x1a7",
504 "SampleAfterValue": "100007",
505 "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache.",
506 "Offcore": "1"
507 },
508 {
509 "CollectPEBSRecord": "1",
510 "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
511 "EventCode": "0xB7",
512 "MSRValue": "0x0200000008",
513 "Counter": "0,1,2,3",
514 "UMask": "0x1",
515 "PEBScounters": "0,1,2,3",
516 "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
517 "PDIR_COUNTER": "na",
518 "MSRIndex": "0x1a6, 0x1a7",
519 "SampleAfterValue": "100007",
520 "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. ",
521 "Offcore": "1"
522 },
523 {
524 "CollectPEBSRecord": "1",
525 "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
526 "EventCode": "0xB7",
527 "MSRValue": "0x1000000008",
528 "Counter": "0,1,2,3",
529 "UMask": "0x1",
530 "PEBScounters": "0,1,2,3",
531 "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE",
532 "PDIR_COUNTER": "na",
533 "MSRIndex": "0x1a6, 0x1a7",
534 "SampleAfterValue": "100007",
535 "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
536 "Offcore": "1"
537 },
538 {
539 "CollectPEBSRecord": "1",
540 "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
541 "EventCode": "0xB7",
542 "MSRValue": "0x4000000008",
543 "Counter": "0,1,2,3",
544 "UMask": "0x1",
545 "PEBScounters": "0,1,2,3",
546 "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING",
547 "PDIR_COUNTER": "na",
548 "MSRIndex": "0x1a6",
549 "SampleAfterValue": "100007",
550 "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received.",
551 "Offcore": "1"
552 },
553 {
554 "CollectPEBSRecord": "1",
555 "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
556 "EventCode": "0xB7",
557 "MSRValue": "0x0000010010",
558 "Counter": "0,1,2,3",
559 "UMask": "0x1",
560 "PEBScounters": "0,1,2,3",
561 "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
562 "PDIR_COUNTER": "na",
563 "MSRIndex": "0x1a6, 0x1a7",
564 "SampleAfterValue": "100007",
565 "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem.",
566 "Offcore": "1"
567 },
568 {
569 "CollectPEBSRecord": "1",
570 "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
571 "EventCode": "0xB7",
572 "MSRValue": "0x0000040010",
573 "Counter": "0,1,2,3",
574 "UMask": "0x1",
575 "PEBScounters": "0,1,2,3",
576 "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT",
577 "PDIR_COUNTER": "na",
578 "MSRIndex": "0x1a6, 0x1a7",
579 "SampleAfterValue": "100007",
580 "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache.",
581 "Offcore": "1"
582 },
583 {
584 "CollectPEBSRecord": "1",
585 "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
586 "EventCode": "0xB7",
587 "MSRValue": "0x0200000010",
588 "Counter": "0,1,2,3",
589 "UMask": "0x1",
590 "PEBScounters": "0,1,2,3",
591 "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
592 "PDIR_COUNTER": "na",
593 "MSRIndex": "0x1a6, 0x1a7",
594 "SampleAfterValue": "100007",
595 "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
596 "Offcore": "1"
597 },
598 {
599 "CollectPEBSRecord": "1",
600 "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
601 "EventCode": "0xB7",
602 "MSRValue": "0x1000000010",
603 "Counter": "0,1,2,3",
604 "UMask": "0x1",
605 "PEBScounters": "0,1,2,3",
606 "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
607 "PDIR_COUNTER": "na",
608 "MSRIndex": "0x1a6, 0x1a7",
609 "SampleAfterValue": "100007",
610 "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
611 "Offcore": "1"
612 },
613 {
614 "CollectPEBSRecord": "1",
615 "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
616 "EventCode": "0xB7",
617 "MSRValue": "0x4000000010",
618 "Counter": "0,1,2,3",
619 "UMask": "0x1",
620 "PEBScounters": "0,1,2,3",
621 "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING",
622 "PDIR_COUNTER": "na",
623 "MSRIndex": "0x1a6",
624 "SampleAfterValue": "100007",
625 "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
626 "Offcore": "1"
627 },
628 {
629 "CollectPEBSRecord": "1",
630 "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
631 "EventCode": "0xB7",
632 "MSRValue": "0x0000010020",
633 "Counter": "0,1,2,3",
634 "UMask": "0x1",
635 "PEBScounters": "0,1,2,3",
636 "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
637 "PDIR_COUNTER": "na",
638 "MSRIndex": "0x1a6, 0x1a7",
639 "SampleAfterValue": "100007",
640 "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem.",
641 "Offcore": "1"
642 },
643 {
644 "CollectPEBSRecord": "1",
645 "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
646 "EventCode": "0xB7",
647 "MSRValue": "0x0000040020",
648 "Counter": "0,1,2,3",
649 "UMask": "0x1",
650 "PEBScounters": "0,1,2,3",
651 "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT",
652 "PDIR_COUNTER": "na",
653 "MSRIndex": "0x1a6, 0x1a7",
654 "SampleAfterValue": "100007",
655 "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache.",
656 "Offcore": "1"
657 },
658 {
659 "CollectPEBSRecord": "1",
660 "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
661 "EventCode": "0xB7",
662 "MSRValue": "0x0200000020",
663 "Counter": "0,1,2,3",
664 "UMask": "0x1",
665 "PEBScounters": "0,1,2,3",
666 "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
667 "PDIR_COUNTER": "na",
668 "MSRIndex": "0x1a6, 0x1a7",
669 "SampleAfterValue": "100007",
670 "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
671 "Offcore": "1"
672 },
673 {
674 "CollectPEBSRecord": "1",
675 "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
676 "EventCode": "0xB7",
677 "MSRValue": "0x1000000020",
678 "Counter": "0,1,2,3",
679 "UMask": "0x1",
680 "PEBScounters": "0,1,2,3",
681 "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
682 "PDIR_COUNTER": "na",
683 "MSRIndex": "0x1a6, 0x1a7",
684 "SampleAfterValue": "100007",
685 "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
686 "Offcore": "1"
687 },
688 {
689 "CollectPEBSRecord": "1",
690 "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
691 "EventCode": "0xB7",
692 "MSRValue": "0x4000000020",
693 "Counter": "0,1,2,3",
694 "UMask": "0x1",
695 "PEBScounters": "0,1,2,3",
696 "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING",
697 "PDIR_COUNTER": "na",
698 "MSRIndex": "0x1a6",
699 "SampleAfterValue": "100007",
700 "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
701 "Offcore": "1"
702 },
703 {
704 "CollectPEBSRecord": "1",
705 "PublicDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
706 "EventCode": "0xB7",
707 "MSRValue": "0x0000010400",
708 "Counter": "0,1,2,3",
709 "UMask": "0x1",
710 "PEBScounters": "0,1,2,3",
711 "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
712 "PDIR_COUNTER": "na",
713 "MSRIndex": "0x1a6, 0x1a7",
714 "SampleAfterValue": "100007",
715 "BriefDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem.",
716 "Offcore": "1"
717 },
718 {
719 "CollectPEBSRecord": "1",
720 "PublicDescription": "Counts bus lock and split lock requests hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
721 "EventCode": "0xB7",
722 "MSRValue": "0x0000040400",
723 "Counter": "0,1,2,3",
724 "UMask": "0x1",
725 "PEBScounters": "0,1,2,3",
726 "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT",
727 "PDIR_COUNTER": "na",
728 "MSRIndex": "0x1a6, 0x1a7",
729 "SampleAfterValue": "100007",
730 "BriefDescription": "Counts bus lock and split lock requests hit the L2 cache.",
731 "Offcore": "1"
732 },
733 {
734 "CollectPEBSRecord": "1",
735 "PublicDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
736 "EventCode": "0xB7",
737 "MSRValue": "0x0200000400",
738 "Counter": "0,1,2,3",
739 "UMask": "0x1",
740 "PEBScounters": "0,1,2,3",
741 "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
742 "PDIR_COUNTER": "na",
743 "MSRIndex": "0x1a6, 0x1a7",
744 "SampleAfterValue": "100007",
745 "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. ",
746 "Offcore": "1"
747 },
748 {
749 "CollectPEBSRecord": "1",
750 "PublicDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
751 "EventCode": "0xB7",
752 "MSRValue": "0x1000000400",
753 "Counter": "0,1,2,3",
754 "UMask": "0x1",
755 "PEBScounters": "0,1,2,3",
756 "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE",
757 "PDIR_COUNTER": "na",
758 "MSRIndex": "0x1a6, 0x1a7",
759 "SampleAfterValue": "100007",
760 "BriefDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
761 "Offcore": "1"
762 },
763 {
764 "CollectPEBSRecord": "1",
765 "PublicDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
766 "EventCode": "0xB7",
767 "MSRValue": "0x4000000400",
768 "Counter": "0,1,2,3",
769 "UMask": "0x1",
770 "PEBScounters": "0,1,2,3",
771 "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
772 "PDIR_COUNTER": "na",
773 "MSRIndex": "0x1a6",
774 "SampleAfterValue": "100007",
775 "BriefDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received.",
776 "Offcore": "1"
777 },
778 {
779 "CollectPEBSRecord": "1",
780 "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
781 "EventCode": "0xB7",
782 "MSRValue": "0x0000010800",
783 "Counter": "0,1,2,3",
784 "UMask": "0x1",
785 "PEBScounters": "0,1,2,3",
786 "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
787 "PDIR_COUNTER": "na",
788 "MSRIndex": "0x1a6, 0x1a7",
789 "SampleAfterValue": "100007",
790 "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem.",
791 "Offcore": "1"
792 },
793 {
794 "CollectPEBSRecord": "1",
795 "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
796 "EventCode": "0xB7",
797 "MSRValue": "0x0000040800",
798 "Counter": "0,1,2,3",
799 "UMask": "0x1",
800 "PEBScounters": "0,1,2,3",
801 "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
802 "PDIR_COUNTER": "na",
803 "MSRIndex": "0x1a6, 0x1a7",
804 "SampleAfterValue": "100007",
805 "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache.",
806 "Offcore": "1"
807 },
808 {
809 "CollectPEBSRecord": "1",
810 "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
811 "EventCode": "0xB7",
812 "MSRValue": "0x0200000800",
813 "Counter": "0,1,2,3",
814 "UMask": "0x1",
815 "PEBScounters": "0,1,2,3",
816 "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
817 "PDIR_COUNTER": "na",
818 "MSRIndex": "0x1a6, 0x1a7",
819 "SampleAfterValue": "100007",
820 "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. ",
821 "Offcore": "1"
822 },
823 {
824 "CollectPEBSRecord": "1",
825 "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
826 "EventCode": "0xB7",
827 "MSRValue": "0x1000000800",
828 "Counter": "0,1,2,3",
829 "UMask": "0x1",
830 "PEBScounters": "0,1,2,3",
831 "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
832 "PDIR_COUNTER": "na",
833 "MSRIndex": "0x1a6, 0x1a7",
834 "SampleAfterValue": "100007",
835 "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
836 "Offcore": "1"
837 },
838 {
839 "CollectPEBSRecord": "1",
840 "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
841 "EventCode": "0xB7",
842 "MSRValue": "0x4000000800",
843 "Counter": "0,1,2,3",
844 "UMask": "0x1",
845 "PEBScounters": "0,1,2,3",
846 "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING",
847 "PDIR_COUNTER": "na",
848 "MSRIndex": "0x1a6",
849 "SampleAfterValue": "100007",
850 "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received.",
851 "Offcore": "1"
852 },
853 {
854 "CollectPEBSRecord": "1",
855 "PublicDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
856 "EventCode": "0xB7",
857 "MSRValue": "0x0000011000",
858 "Counter": "0,1,2,3",
859 "UMask": "0x1",
860 "PEBScounters": "0,1,2,3",
861 "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE",
862 "PDIR_COUNTER": "na",
863 "MSRIndex": "0x1a6, 0x1a7",
864 "SampleAfterValue": "100007",
865 "BriefDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem.",
866 "Offcore": "1"
867 },
868 {
869 "CollectPEBSRecord": "1",
870 "PublicDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
871 "EventCode": "0xB7",
872 "MSRValue": "0x0000041000",
873 "Counter": "0,1,2,3",
874 "UMask": "0x1",
875 "PEBScounters": "0,1,2,3",
876 "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
877 "PDIR_COUNTER": "na",
878 "MSRIndex": "0x1a6, 0x1a7",
879 "SampleAfterValue": "100007",
880 "BriefDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache.",
881 "Offcore": "1"
882 },
883 {
884 "CollectPEBSRecord": "1",
885 "PublicDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
886 "EventCode": "0xB7",
887 "MSRValue": "0x0200001000",
888 "Counter": "0,1,2,3",
889 "UMask": "0x1",
890 "PEBScounters": "0,1,2,3",
891 "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
892 "PDIR_COUNTER": "na",
893 "MSRIndex": "0x1a6, 0x1a7",
894 "SampleAfterValue": "100007",
895 "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. ",
896 "Offcore": "1"
897 },
898 {
899 "CollectPEBSRecord": "1",
900 "PublicDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
901 "EventCode": "0xB7",
902 "MSRValue": "0x1000001000",
903 "Counter": "0,1,2,3",
904 "UMask": "0x1",
905 "PEBScounters": "0,1,2,3",
906 "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
907 "PDIR_COUNTER": "na",
908 "MSRIndex": "0x1a6, 0x1a7",
909 "SampleAfterValue": "100007",
910 "BriefDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
911 "Offcore": "1"
912 },
913 {
914 "CollectPEBSRecord": "1",
915 "PublicDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
916 "EventCode": "0xB7",
917 "MSRValue": "0x4000001000",
918 "Counter": "0,1,2,3",
919 "UMask": "0x1",
920 "PEBScounters": "0,1,2,3",
921 "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING",
922 "PDIR_COUNTER": "na",
923 "MSRIndex": "0x1a6",
924 "SampleAfterValue": "100007",
925 "BriefDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received.",
926 "Offcore": "1"
927 },
928 {
929 "CollectPEBSRecord": "1",
930 "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
931 "EventCode": "0xB7",
932 "MSRValue": "0x0000012000",
933 "Counter": "0,1,2,3",
934 "UMask": "0x1",
935 "PEBScounters": "0,1,2,3",
936 "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
937 "PDIR_COUNTER": "na",
938 "MSRIndex": "0x1a6, 0x1a7",
939 "SampleAfterValue": "100007",
940 "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem.",
941 "Offcore": "1"
942 },
943 {
944 "CollectPEBSRecord": "1",
945 "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
946 "EventCode": "0xB7",
947 "MSRValue": "0x0000042000",
948 "Counter": "0,1,2,3",
949 "UMask": "0x1",
950 "PEBScounters": "0,1,2,3",
951 "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
952 "PDIR_COUNTER": "na",
953 "MSRIndex": "0x1a6, 0x1a7",
954 "SampleAfterValue": "100007",
955 "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache.",
956 "Offcore": "1"
957 },
958 {
959 "CollectPEBSRecord": "1",
960 "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
961 "EventCode": "0xB7",
962 "MSRValue": "0x0200002000",
963 "Counter": "0,1,2,3",
964 "UMask": "0x1",
965 "PEBScounters": "0,1,2,3",
966 "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
967 "PDIR_COUNTER": "na",
968 "MSRIndex": "0x1a6, 0x1a7",
969 "SampleAfterValue": "100007",
970 "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
971 "Offcore": "1"
972 },
973 {
974 "CollectPEBSRecord": "1",
975 "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
976 "EventCode": "0xB7",
977 "MSRValue": "0x1000002000",
978 "Counter": "0,1,2,3",
979 "UMask": "0x1",
980 "PEBScounters": "0,1,2,3",
981 "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
982 "PDIR_COUNTER": "na",
983 "MSRIndex": "0x1a6, 0x1a7",
984 "SampleAfterValue": "100007",
985 "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
986 "Offcore": "1"
987 },
988 {
989 "CollectPEBSRecord": "1",
990 "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
991 "EventCode": "0xB7",
992 "MSRValue": "0x4000002000",
993 "Counter": "0,1,2,3",
994 "UMask": "0x1",
995 "PEBScounters": "0,1,2,3",
996 "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
997 "PDIR_COUNTER": "na",
998 "MSRIndex": "0x1a6",
999 "SampleAfterValue": "100007",
1000 "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1001 "Offcore": "1"
1002 },
1003 {
1004 "CollectPEBSRecord": "1",
1005 "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1006 "EventCode": "0xB7",
1007 "MSRValue": "0x0000014800",
1008 "Counter": "0,1,2,3",
1009 "UMask": "0x1",
1010 "PEBScounters": "0,1,2,3",
1011 "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
1012 "PDIR_COUNTER": "na",
1013 "MSRIndex": "0x1a6, 0x1a7",
1014 "SampleAfterValue": "100007",
1015 "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region have any transaction responses from the uncore subsystem.",
1016 "Offcore": "1"
1017 },
1018 {
1019 "CollectPEBSRecord": "1",
1020 "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1021 "EventCode": "0xB7",
1022 "MSRValue": "0x0000044800",
1023 "Counter": "0,1,2,3",
1024 "UMask": "0x1",
1025 "PEBScounters": "0,1,2,3",
1026 "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
1027 "PDIR_COUNTER": "na",
1028 "MSRIndex": "0x1a6, 0x1a7",
1029 "SampleAfterValue": "100007",
1030 "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region hit the L2 cache.",
1031 "Offcore": "1"
1032 },
1033 {
1034 "CollectPEBSRecord": "1",
1035 "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1036 "EventCode": "0xB7",
1037 "MSRValue": "0x0200004800",
1038 "Counter": "0,1,2,3",
1039 "UMask": "0x1",
1040 "PEBScounters": "0,1,2,3",
1041 "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
1042 "PDIR_COUNTER": "na",
1043 "MSRIndex": "0x1a6, 0x1a7",
1044 "SampleAfterValue": "100007",
1045 "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. ",
1046 "Offcore": "1"
1047 },
1048 {
1049 "CollectPEBSRecord": "1",
1050 "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1051 "EventCode": "0xB7",
1052 "MSRValue": "0x1000004800",
1053 "Counter": "0,1,2,3",
1054 "UMask": "0x1",
1055 "PEBScounters": "0,1,2,3",
1056 "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
1057 "PDIR_COUNTER": "na",
1058 "MSRIndex": "0x1a6, 0x1a7",
1059 "SampleAfterValue": "100007",
1060 "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
1061 "Offcore": "1"
1062 },
1063 {
1064 "CollectPEBSRecord": "1",
1065 "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1066 "EventCode": "0xB7",
1067 "MSRValue": "0x4000004800",
1068 "Counter": "0,1,2,3",
1069 "UMask": "0x1",
1070 "PEBScounters": "0,1,2,3",
1071 "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING",
1072 "PDIR_COUNTER": "na",
1073 "MSRIndex": "0x1a6",
1074 "SampleAfterValue": "100007",
1075 "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1076 "Offcore": "1"
1077 },
1078 {
1079 "CollectPEBSRecord": "1",
1080 "PublicDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1081 "EventCode": "0xB7",
1082 "MSRValue": "0x0000018000",
1083 "Counter": "0,1,2,3",
1084 "UMask": "0x1",
1085 "PEBScounters": "0,1,2,3",
1086 "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
1087 "PDIR_COUNTER": "na",
1088 "MSRIndex": "0x1a6, 0x1a7",
1089 "SampleAfterValue": "100007",
1090 "BriefDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem.",
1091 "Offcore": "1"
1092 },
1093 {
1094 "CollectPEBSRecord": "1",
1095 "PublicDescription": "Counts requests to the uncore subsystem hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1096 "EventCode": "0xB7",
1097 "MSRValue": "0x0000048000",
1098 "Counter": "0,1,2,3",
1099 "UMask": "0x1",
1100 "PEBScounters": "0,1,2,3",
1101 "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
1102 "PDIR_COUNTER": "na",
1103 "MSRIndex": "0x1a6, 0x1a7",
1104 "SampleAfterValue": "100007",
1105 "BriefDescription": "Counts requests to the uncore subsystem hit the L2 cache.",
1106 "Offcore": "1"
1107 },
1108 {
1109 "CollectPEBSRecord": "1",
1110 "PublicDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1111 "EventCode": "0xB7",
1112 "MSRValue": "0x0200008000",
1113 "Counter": "0,1,2,3",
1114 "UMask": "0x1",
1115 "PEBScounters": "0,1,2,3",
1116 "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
1117 "PDIR_COUNTER": "na",
1118 "MSRIndex": "0x1a6, 0x1a7",
1119 "SampleAfterValue": "100007",
1120 "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. ",
1121 "Offcore": "1"
1122 },
1123 {
1124 "CollectPEBSRecord": "1",
1125 "PublicDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1126 "EventCode": "0xB7",
1127 "MSRValue": "0x1000008000",
1128 "Counter": "0,1,2,3",
1129 "UMask": "0x1",
1130 "PEBScounters": "0,1,2,3",
1131 "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
1132 "PDIR_COUNTER": "na",
1133 "MSRIndex": "0x1a6, 0x1a7",
1134 "SampleAfterValue": "100007",
1135 "BriefDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
1136 "Offcore": "1"
1137 },
1138 {
1139 "CollectPEBSRecord": "1",
1140 "PublicDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1141 "EventCode": "0xB7",
1142 "MSRValue": "0x4000008000",
1143 "Counter": "0,1,2,3",
1144 "UMask": "0x1",
1145 "PEBScounters": "0,1,2,3",
1146 "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
1147 "PDIR_COUNTER": "na",
1148 "MSRIndex": "0x1a6",
1149 "SampleAfterValue": "100007",
1150 "BriefDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1151 "Offcore": "1"
1152 },
1153 {
1154 "CollectPEBSRecord": "1",
1155 "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1156 "EventCode": "0xB7",
1157 "MSRValue": "0x0000013010",
1158 "Counter": "0,1,2,3",
1159 "UMask": "0x1",
1160 "PEBScounters": "0,1,2,3",
1161 "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE",
1162 "PDIR_COUNTER": "na",
1163 "MSRIndex": "0x1a6, 0x1a7",
1164 "SampleAfterValue": "100007",
1165 "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem.",
1166 "Offcore": "1"
1167 },
1168 {
1169 "CollectPEBSRecord": "1",
1170 "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1171 "EventCode": "0xB7",
1172 "MSRValue": "0x0000043010",
1173 "Counter": "0,1,2,3",
1174 "UMask": "0x1",
1175 "PEBScounters": "0,1,2,3",
1176 "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
1177 "PDIR_COUNTER": "na",
1178 "MSRIndex": "0x1a6, 0x1a7",
1179 "SampleAfterValue": "100007",
1180 "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache.",
1181 "Offcore": "1"
1182 },
1183 {
1184 "CollectPEBSRecord": "1",
1185 "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1186 "EventCode": "0xB7",
1187 "MSRValue": "0x0200003010",
1188 "Counter": "0,1,2,3",
1189 "UMask": "0x1",
1190 "PEBScounters": "0,1,2,3",
1191 "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
1192 "PDIR_COUNTER": "na",
1193 "MSRIndex": "0x1a6, 0x1a7",
1194 "SampleAfterValue": "100007",
1195 "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. ",
1196 "Offcore": "1"
1197 },
1198 {
1199 "CollectPEBSRecord": "1",
1200 "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1201 "EventCode": "0xB7",
1202 "MSRValue": "0x1000003010",
1203 "Counter": "0,1,2,3",
1204 "UMask": "0x1",
1205 "PEBScounters": "0,1,2,3",
1206 "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
1207 "PDIR_COUNTER": "na",
1208 "MSRIndex": "0x1a6, 0x1a7",
1209 "SampleAfterValue": "100007",
1210 "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
1211 "Offcore": "1"
1212 },
1213 {
1214 "CollectPEBSRecord": "1",
1215 "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1216 "EventCode": "0xB7",
1217 "MSRValue": "0x4000003010",
1218 "Counter": "0,1,2,3",
1219 "UMask": "0x1",
1220 "PEBScounters": "0,1,2,3",
1221 "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING",
1222 "PDIR_COUNTER": "na",
1223 "MSRIndex": "0x1a6",
1224 "SampleAfterValue": "100007",
1225 "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1226 "Offcore": "1"
1227 },
1228 {
1229 "CollectPEBSRecord": "1",
1230 "PublicDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1231 "EventCode": "0xB7",
1232 "MSRValue": "0x0000013091",
1233 "Counter": "0,1,2,3",
1234 "UMask": "0x1",
1235 "PEBScounters": "0,1,2,3",
1236 "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
1237 "PDIR_COUNTER": "na",
1238 "MSRIndex": "0x1a6, 0x1a7",
1239 "SampleAfterValue": "100007",
1240 "BriefDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem.",
1241 "Offcore": "1"
1242 },
1243 {
1244 "CollectPEBSRecord": "1",
1245 "PublicDescription": "Counts data reads (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1246 "EventCode": "0xB7",
1247 "MSRValue": "0x0000043091",
1248 "Counter": "0,1,2,3",
1249 "UMask": "0x1",
1250 "PEBScounters": "0,1,2,3",
1251 "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
1252 "PDIR_COUNTER": "na",
1253 "MSRIndex": "0x1a6, 0x1a7",
1254 "SampleAfterValue": "100007",
1255 "BriefDescription": "Counts data reads (demand & prefetch) hit the L2 cache.",
1256 "Offcore": "1"
1257 },
1258 {
1259 "CollectPEBSRecord": "1",
1260 "PublicDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1261 "EventCode": "0xB7",
1262 "MSRValue": "0x0200003091",
1263 "Counter": "0,1,2,3",
1264 "UMask": "0x1",
1265 "PEBScounters": "0,1,2,3",
1266 "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
1267 "PDIR_COUNTER": "na",
1268 "MSRIndex": "0x1a6, 0x1a7",
1269 "SampleAfterValue": "100007",
1270 "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
1271 "Offcore": "1"
1272 },
1273 {
1274 "CollectPEBSRecord": "1",
1275 "PublicDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1276 "EventCode": "0xB7",
1277 "MSRValue": "0x1000003091",
1278 "Counter": "0,1,2,3",
1279 "UMask": "0x1",
1280 "PEBScounters": "0,1,2,3",
1281 "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
1282 "PDIR_COUNTER": "na",
1283 "MSRIndex": "0x1a6, 0x1a7",
1284 "SampleAfterValue": "100007",
1285 "BriefDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
1286 "Offcore": "1"
1287 },
1288 {
1289 "CollectPEBSRecord": "1",
1290 "PublicDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1291 "EventCode": "0xB7",
1292 "MSRValue": "0x4000003091",
1293 "Counter": "0,1,2,3",
1294 "UMask": "0x1",
1295 "PEBScounters": "0,1,2,3",
1296 "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
1297 "PDIR_COUNTER": "na",
1298 "MSRIndex": "0x1a6",
1299 "SampleAfterValue": "100007",
1300 "BriefDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1301 "Offcore": "1"
1302 },
1303 {
1304 "CollectPEBSRecord": "1",
1305 "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1306 "EventCode": "0xB7",
1307 "MSRValue": "0x0000010022",
1308 "Counter": "0,1,2,3",
1309 "UMask": "0x1",
1310 "PEBScounters": "0,1,2,3",
1311 "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
1312 "PDIR_COUNTER": "na",
1313 "MSRIndex": "0x1a6, 0x1a7",
1314 "SampleAfterValue": "100007",
1315 "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.",
1316 "Offcore": "1"
1317 },
1318 {
1319 "CollectPEBSRecord": "1",
1320 "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1321 "EventCode": "0xB7",
1322 "MSRValue": "0x0000040022",
1323 "Counter": "0,1,2,3",
1324 "UMask": "0x1",
1325 "PEBScounters": "0,1,2,3",
1326 "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
1327 "PDIR_COUNTER": "na",
1328 "MSRIndex": "0x1a6, 0x1a7",
1329 "SampleAfterValue": "100007",
1330 "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache.",
1331 "Offcore": "1"
1332 },
1333 {
1334 "CollectPEBSRecord": "1",
1335 "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1336 "EventCode": "0xB7",
1337 "MSRValue": "0x0200000022",
1338 "Counter": "0,1,2,3",
1339 "UMask": "0x1",
1340 "PEBScounters": "0,1,2,3",
1341 "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
1342 "PDIR_COUNTER": "na",
1343 "MSRIndex": "0x1a6, 0x1a7",
1344 "SampleAfterValue": "100007",
1345 "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
1346 "Offcore": "1"
1347 },
1348 {
1349 "CollectPEBSRecord": "1",
1350 "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1351 "EventCode": "0xB7",
1352 "MSRValue": "0x1000000022",
1353 "Counter": "0,1,2,3",
1354 "UMask": "0x1",
1355 "PEBScounters": "0,1,2,3",
1356 "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
1357 "PDIR_COUNTER": "na",
1358 "MSRIndex": "0x1a6, 0x1a7",
1359 "SampleAfterValue": "100007",
1360 "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
1361 "Offcore": "1"
1362 },
1363 {
1364 "CollectPEBSRecord": "1",
1365 "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1366 "EventCode": "0xB7",
1367 "MSRValue": "0x4000000022",
1368 "Counter": "0,1,2,3",
1369 "UMask": "0x1",
1370 "PEBScounters": "0,1,2,3",
1371 "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
1372 "PDIR_COUNTER": "na",
1373 "MSRIndex": "0x1a6",
1374 "SampleAfterValue": "100007",
1375 "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1376 "Offcore": "1"
1377 },
1378 {
1379 "CollectPEBSRecord": "1",
1380 "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1381 "EventCode": "0xB7",
1382 "MSRValue": "0x00000132b7",
1383 "Counter": "0,1,2,3",
1384 "UMask": "0x1",
1385 "PEBScounters": "0,1,2,3",
1386 "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
1387 "PDIR_COUNTER": "na",
1388 "MSRIndex": "0x1a6, 0x1a7",
1389 "SampleAfterValue": "100007",
1390 "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.",
1391 "Offcore": "1"
1392 },
1393 {
1394 "CollectPEBSRecord": "1",
1395 "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1396 "EventCode": "0xB7",
1397 "MSRValue": "0x00000432b7",
1398 "Counter": "0,1,2,3",
1399 "UMask": "0x1",
1400 "PEBScounters": "0,1,2,3",
1401 "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT",
1402 "PDIR_COUNTER": "na",
1403 "MSRIndex": "0x1a6, 0x1a7",
1404 "SampleAfterValue": "100007",
1405 "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache.",
1406 "Offcore": "1"
1407 },
1408 {
1409 "CollectPEBSRecord": "1",
1410 "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1411 "EventCode": "0xB7",
1412 "MSRValue": "0x02000032b7",
1413 "Counter": "0,1,2,3",
1414 "UMask": "0x1",
1415 "PEBScounters": "0,1,2,3",
1416 "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
1417 "PDIR_COUNTER": "na",
1418 "MSRIndex": "0x1a6, 0x1a7",
1419 "SampleAfterValue": "100007",
1420 "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
1421 "Offcore": "1"
1422 },
1423 {
1424 "CollectPEBSRecord": "1",
1425 "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1426 "EventCode": "0xB7",
1427 "MSRValue": "0x10000032b7",
1428 "Counter": "0,1,2,3",
1429 "UMask": "0x1",
1430 "PEBScounters": "0,1,2,3",
1431 "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE",
1432 "PDIR_COUNTER": "na",
1433 "MSRIndex": "0x1a6, 0x1a7",
1434 "SampleAfterValue": "100007",
1435 "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
1436 "Offcore": "1"
1437 },
1438 {
1439 "CollectPEBSRecord": "1",
1440 "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
1441 "EventCode": "0xB7",
1442 "MSRValue": "0x40000032b7",
1443 "Counter": "0,1,2,3",
1444 "UMask": "0x1",
1445 "PEBScounters": "0,1,2,3",
1446 "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
1447 "PDIR_COUNTER": "na",
1448 "MSRIndex": "0x1a6",
1449 "SampleAfterValue": "100007",
1450 "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
1451 "Offcore": "1"
1452 }
1453] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json b/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json
new file mode 100644
index 000000000000..a7878965ceab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json
@@ -0,0 +1,62 @@
1[
2 {
3 "CollectPEBSRecord": "1",
4 "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit). The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
5 "EventCode": "0x80",
6 "Counter": "0,1,2,3",
7 "UMask": "0x1",
8 "PEBScounters": "0,1,2,3",
9 "EventName": "ICACHE.HIT",
10 "PDIR_COUNTER": "na",
11 "SampleAfterValue": "200003",
12 "BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture"
13 },
14 {
15 "CollectPEBSRecord": "1",
16 "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is not in the ICache (miss). The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
17 "EventCode": "0x80",
18 "Counter": "0,1,2,3",
19 "UMask": "0x2",
20 "PEBScounters": "0,1,2,3",
21 "EventName": "ICACHE.MISSES",
22 "PDIR_COUNTER": "na",
23 "SampleAfterValue": "200003",
24 "BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture"
25 },
26 {
27 "CollectPEBSRecord": "1",
28 "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line. The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS. Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.",
29 "EventCode": "0x80",
30 "Counter": "0,1,2,3",
31 "UMask": "0x3",
32 "PEBScounters": "0,1,2,3",
33 "EventName": "ICACHE.ACCESSES",
34 "PDIR_COUNTER": "na",
35 "SampleAfterValue": "200003",
36 "BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture"
37 },
38 {
39 "CollectPEBSRecord": "1",
40 "PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM. The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine. Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops. The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.",
41 "EventCode": "0xE7",
42 "Counter": "0,1,2,3",
43 "UMask": "0x1",
44 "PEBScounters": "0,1,2,3",
45 "EventName": "MS_DECODED.MS_ENTRY",
46 "PDIR_COUNTER": "na",
47 "SampleAfterValue": "200003",
48 "BriefDescription": "MS decode starts"
49 },
50 {
51 "CollectPEBSRecord": "1",
52 "PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.",
53 "EventCode": "0xE9",
54 "Counter": "0,1,2,3",
55 "UMask": "0x1",
56 "PEBScounters": "0,1,2,3",
57 "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
58 "PDIR_COUNTER": "na",
59 "SampleAfterValue": "200003",
60 "BriefDescription": "Decode restrictions due to predicting wrong instruction length"
61 }
62] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json
new file mode 100644
index 000000000000..91e0815f3ffb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json
@@ -0,0 +1,38 @@
1[
2 {
3 "PEBS": "2",
4 "CollectPEBSRecord": "2",
5 "PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.",
6 "EventCode": "0x13",
7 "Counter": "0,1,2,3",
8 "UMask": "0x2",
9 "PEBScounters": "0,1,2,3",
10 "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
11 "SampleAfterValue": "200003",
12 "BriefDescription": "Load uops that split a page (Precise event capable)"
13 },
14 {
15 "PEBS": "2",
16 "CollectPEBSRecord": "2",
17 "PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.",
18 "EventCode": "0x13",
19 "Counter": "0,1,2,3",
20 "UMask": "0x4",
21 "PEBScounters": "0,1,2,3",
22 "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
23 "SampleAfterValue": "200003",
24 "BriefDescription": "Store uops that split a page (Precise event capable)"
25 },
26 {
27 "CollectPEBSRecord": "1",
28 "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.",
29 "EventCode": "0xC3",
30 "Counter": "0,1,2,3",
31 "UMask": "0x2",
32 "PEBScounters": "0,1,2,3",
33 "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
34 "PDIR_COUNTER": "na",
35 "SampleAfterValue": "20003",
36 "BriefDescription": "Machine clears due to memory ordering issue"
37 }
38] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/other.json b/tools/perf/pmu-events/arch/x86/goldmontplus/other.json
new file mode 100644
index 000000000000..b860374418ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/other.json
@@ -0,0 +1,98 @@
1[
2 {
3 "CollectPEBSRecord": "1",
4 "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.",
5 "EventCode": "0x86",
6 "Counter": "0,1,2,3",
7 "UMask": "0x0",
8 "PEBScounters": "0,1,2,3",
9 "EventName": "FETCH_STALL.ALL",
10 "PDIR_COUNTER": "na",
11 "SampleAfterValue": "200003",
12 "BriefDescription": "Cycles code-fetch stalled due to any reason."
13 },
14 {
15 "CollectPEBSRecord": "1",
16 "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
17 "EventCode": "0x86",
18 "Counter": "0,1,2,3",
19 "UMask": "0x1",
20 "PEBScounters": "0,1,2,3",
21 "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
22 "PDIR_COUNTER": "na",
23 "SampleAfterValue": "200003",
24 "BriefDescription": "Cycles the code-fetch stalls and an ITLB miss is outstanding."
25 },
26 {
27 "CollectPEBSRecord": "1",
28 "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).",
29 "EventCode": "0xCA",
30 "Counter": "0,1,2,3",
31 "UMask": "0x0",
32 "PEBScounters": "0,1,2,3",
33 "EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY",
34 "PDIR_COUNTER": "na",
35 "SampleAfterValue": "200003",
36 "BriefDescription": "Unfilled issue slots per cycle"
37 },
38 {
39 "CollectPEBSRecord": "1",
40 "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.",
41 "EventCode": "0xCA",
42 "Counter": "0,1,2,3",
43 "UMask": "0x1",
44 "PEBScounters": "0,1,2,3",
45 "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL",
46 "PDIR_COUNTER": "na",
47 "SampleAfterValue": "200003",
48 "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend"
49 },
50 {
51 "CollectPEBSRecord": "1",
52 "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.",
53 "EventCode": "0xCA",
54 "Counter": "0,1,2,3",
55 "UMask": "0x2",
56 "PEBScounters": "0,1,2,3",
57 "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY",
58 "PDIR_COUNTER": "na",
59 "SampleAfterValue": "200003",
60 "BriefDescription": "Unfilled issue slots per cycle to recover"
61 },
62 {
63 "CollectPEBSRecord": "2",
64 "PublicDescription": "Counts hardware interrupts received by the processor.",
65 "EventCode": "0xCB",
66 "Counter": "0,1,2,3",
67 "UMask": "0x1",
68 "PEBScounters": "0,1,2,3",
69 "EventName": "HW_INTERRUPTS.RECEIVED",
70 "PDIR_COUNTER": "na",
71 "SampleAfterValue": "203",
72 "BriefDescription": "Hardware interrupts received"
73 },
74 {
75 "CollectPEBSRecord": "2",
76 "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
77 "EventCode": "0xCB",
78 "Counter": "0,1,2,3",
79 "UMask": "0x2",
80 "PEBScounters": "0,1,2,3",
81 "EventName": "HW_INTERRUPTS.MASKED",
82 "PDIR_COUNTER": "na",
83 "SampleAfterValue": "200003",
84 "BriefDescription": "Cycles hardware interrupts are masked"
85 },
86 {
87 "CollectPEBSRecord": "2",
88 "PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).",
89 "EventCode": "0xCB",
90 "Counter": "0,1,2,3",
91 "UMask": "0x4",
92 "PEBScounters": "0,1,2,3",
93 "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
94 "PDIR_COUNTER": "na",
95 "SampleAfterValue": "200003",
96 "BriefDescription": "Cycles pending interrupts are masked"
97 }
98] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
new file mode 100644
index 000000000000..ccf1aed69197
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
@@ -0,0 +1,544 @@
1[
2 {
3 "PEBS": "2",
4 "CollectPEBSRecord": "1",
5 "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.",
6 "EventCode": "0x00",
7 "Counter": "Fixed counter 0",
8 "UMask": "0x1",
9 "PEBScounters": "32",
10 "EventName": "INST_RETIRED.ANY",
11 "PDIR_COUNTER": "na",
12 "SampleAfterValue": "2000003",
13 "BriefDescription": "Instructions retired (Fixed event)"
14 },
15 {
16 "CollectPEBSRecord": "1",
17 "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.",
18 "EventCode": "0x00",
19 "Counter": "Fixed counter 1",
20 "UMask": "0x2",
21 "PEBScounters": "33",
22 "EventName": "CPU_CLK_UNHALTED.CORE",
23 "PDIR_COUNTER": "na",
24 "SampleAfterValue": "2000003",
25 "BriefDescription": "Core cycles when core is not halted (Fixed event)"
26 },
27 {
28 "CollectPEBSRecord": "1",
29 "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.",
30 "EventCode": "0x00",
31 "Counter": "Fixed counter 2",
32 "UMask": "0x3",
33 "PEBScounters": "34",
34 "EventName": "CPU_CLK_UNHALTED.REF_TSC",
35 "PDIR_COUNTER": "na",
36 "SampleAfterValue": "2000003",
37 "BriefDescription": "Reference cycles when core is not halted (Fixed event)"
38 },
39 {
40 "PEBS": "2",
41 "CollectPEBSRecord": "2",
42 "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.",
43 "EventCode": "0x03",
44 "Counter": "0,1,2,3",
45 "UMask": "0x1",
46 "PEBScounters": "0,1,2,3",
47 "EventName": "LD_BLOCKS.DATA_UNKNOWN",
48 "SampleAfterValue": "200003",
49 "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)"
50 },
51 {
52 "PEBS": "2",
53 "CollectPEBSRecord": "2",
54 "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.",
55 "EventCode": "0x03",
56 "Counter": "0,1,2,3",
57 "UMask": "0x2",
58 "PEBScounters": "0,1,2,3",
59 "EventName": "LD_BLOCKS.STORE_FORWARD",
60 "SampleAfterValue": "200003",
61 "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)"
62 },
63 {
64 "PEBS": "2",
65 "CollectPEBSRecord": "2",
66 "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.",
67 "EventCode": "0x03",
68 "Counter": "0,1,2,3",
69 "UMask": "0x4",
70 "PEBScounters": "0,1,2,3",
71 "EventName": "LD_BLOCKS.4K_ALIAS",
72 "SampleAfterValue": "200003",
73 "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)"
74 },
75 {
76 "PEBS": "2",
77 "CollectPEBSRecord": "2",
78 "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).",
79 "EventCode": "0x03",
80 "Counter": "0,1,2,3",
81 "UMask": "0x8",
82 "PEBScounters": "0,1,2,3",
83 "EventName": "LD_BLOCKS.UTLB_MISS",
84 "SampleAfterValue": "200003",
85 "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)"
86 },
87 {
88 "PEBS": "2",
89 "CollectPEBSRecord": "2",
90 "PublicDescription": "Counts anytime a load that retires is blocked for any reason.",
91 "EventCode": "0x03",
92 "Counter": "0,1,2,3",
93 "UMask": "0x10",
94 "PEBScounters": "0,1,2,3",
95 "EventName": "LD_BLOCKS.ALL_BLOCK",
96 "SampleAfterValue": "200003",
97 "BriefDescription": "Loads blocked (Precise event capable)"
98 },
99 {
100 "CollectPEBSRecord": "1",
101 "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.",
102 "EventCode": "0x0E",
103 "Counter": "0,1,2,3",
104 "UMask": "0x0",
105 "PEBScounters": "0,1,2,3",
106 "EventName": "UOPS_ISSUED.ANY",
107 "PDIR_COUNTER": "na",
108 "SampleAfterValue": "200003",
109 "BriefDescription": "Uops issued to the back end per cycle"
110 },
111 {
112 "CollectPEBSRecord": "1",
113 "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.",
114 "EventCode": "0x3C",
115 "Counter": "0,1,2,3",
116 "UMask": "0x0",
117 "PEBScounters": "0,1,2,3",
118 "EventName": "CPU_CLK_UNHALTED.CORE_P",
119 "PDIR_COUNTER": "na",
120 "SampleAfterValue": "2000003",
121 "BriefDescription": "Core cycles when core is not halted"
122 },
123 {
124 "CollectPEBSRecord": "1",
125 "PublicDescription": "Reference cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.",
126 "EventCode": "0x3C",
127 "Counter": "0,1,2,3",
128 "UMask": "0x1",
129 "PEBScounters": "0,1,2,3",
130 "EventName": "CPU_CLK_UNHALTED.REF",
131 "PDIR_COUNTER": "na",
132 "SampleAfterValue": "2000003",
133 "BriefDescription": "Reference cycles when core is not halted"
134 },
135 {
136 "CollectPEBSRecord": "1",
137 "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.",
138 "EventCode": "0x9C",
139 "Counter": "0,1,2,3",
140 "UMask": "0x0",
141 "PEBScounters": "0,1,2,3",
142 "EventName": "UOPS_NOT_DELIVERED.ANY",
143 "PDIR_COUNTER": "na",
144 "SampleAfterValue": "200003",
145 "BriefDescription": "Uops requested but not-delivered to the back-end per cycle"
146 },
147 {
148 "PEBS": "2",
149 "CollectPEBSRecord": "1",
150 "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.",
151 "EventCode": "0xC0",
152 "Counter": "0,1,2,3",
153 "UMask": "0x0",
154 "PEBScounters": "0,1,2,3",
155 "EventName": "INST_RETIRED.ANY_P",
156 "SampleAfterValue": "2000003",
157 "BriefDescription": "Instructions retired (Precise event capable)"
158 },
159 {
160 "PEBS": "2",
161 "CollectPEBSRecord": "2",
162 "PublicDescription": "Counts INST_RETIRED.ANY using the Reduced Skid PEBS feature that reduces the shadow in which events aren't counted allowing for a more unbiased distribution of samples across instructions retired.",
163 "EventCode": "0xC0",
164 "Counter": "0,1,2,3",
165 "UMask": "0x0",
166 "EventName": "INST_RETIRED.PREC_DIST",
167 "SampleAfterValue": "2000003",
168 "BriefDescription": "Instructions retired - using Reduced Skid PEBS feature"
169 },
170 {
171 "PEBS": "2",
172 "CollectPEBSRecord": "2",
173 "PublicDescription": "Counts uops which retired.",
174 "EventCode": "0xC2",
175 "Counter": "0,1,2,3",
176 "UMask": "0x0",
177 "PEBScounters": "0,1,2,3",
178 "EventName": "UOPS_RETIRED.ANY",
179 "PDIR_COUNTER": "na",
180 "SampleAfterValue": "2000003",
181 "BriefDescription": "Uops retired (Precise event capable)"
182 },
183 {
184 "PEBS": "2",
185 "CollectPEBSRecord": "2",
186 "PublicDescription": "Counts uops retired that are from the complex flows issued by the micro-sequencer (MS). Counts both the uops from a micro-coded instruction, and the uops that might be generated from a micro-coded assist.",
187 "EventCode": "0xC2",
188 "Counter": "0,1,2,3",
189 "UMask": "0x1",
190 "PEBScounters": "0,1,2,3",
191 "EventName": "UOPS_RETIRED.MS",
192 "PDIR_COUNTER": "na",
193 "SampleAfterValue": "2000003",
194 "BriefDescription": "MS uops retired (Precise event capable)"
195 },
196 {
197 "PEBS": "2",
198 "CollectPEBSRecord": "1",
199 "PublicDescription": "Counts the number of floating point divide uops retired.",
200 "EventCode": "0xC2",
201 "Counter": "0,1,2,3",
202 "UMask": "0x8",
203 "PEBScounters": "0,1,2,3",
204 "EventName": "UOPS_RETIRED.FPDIV",
205 "SampleAfterValue": "2000003",
206 "BriefDescription": "Floating point divide uops retired (Precise Event Capable)"
207 },
208 {
209 "PEBS": "2",
210 "CollectPEBSRecord": "1",
211 "PublicDescription": "Counts the number of integer divide uops retired.",
212 "EventCode": "0xC2",
213 "Counter": "0,1,2,3",
214 "UMask": "0x10",
215 "PEBScounters": "0,1,2,3",
216 "EventName": "UOPS_RETIRED.IDIV",
217 "SampleAfterValue": "2000003",
218 "BriefDescription": "Integer divide uops retired (Precise Event Capable)"
219 },
220 {
221 "CollectPEBSRecord": "1",
222 "PublicDescription": "Counts machine clears for any reason.",
223 "EventCode": "0xC3",
224 "Counter": "0,1,2,3",
225 "UMask": "0x0",
226 "PEBScounters": "0,1,2,3",
227 "EventName": "MACHINE_CLEARS.ALL",
228 "PDIR_COUNTER": "na",
229 "SampleAfterValue": "20003",
230 "BriefDescription": "All machine clears"
231 },
232 {
233 "CollectPEBSRecord": "1",
234 "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
235 "EventCode": "0xC3",
236 "Counter": "0,1,2,3",
237 "UMask": "0x1",
238 "PEBScounters": "0,1,2,3",
239 "EventName": "MACHINE_CLEARS.SMC",
240 "PDIR_COUNTER": "na",
241 "SampleAfterValue": "20003",
242 "BriefDescription": "Self-Modifying Code detected"
243 },
244 {
245 "CollectPEBSRecord": "1",
246 "PublicDescription": "Counts machine clears due to floating point (FP) operations needing assists. For instance, if the result was a floating point denormal, the hardware clears the pipeline and reissues uops to produce the correct IEEE compliant denormal result.",
247 "EventCode": "0xC3",
248 "Counter": "0,1,2,3",
249 "UMask": "0x4",
250 "PEBScounters": "0,1,2,3",
251 "EventName": "MACHINE_CLEARS.FP_ASSIST",
252 "PDIR_COUNTER": "na",
253 "SampleAfterValue": "20003",
254 "BriefDescription": "Machine clears due to FP assists"
255 },
256 {
257 "CollectPEBSRecord": "1",
258 "PublicDescription": "Counts machine clears due to memory disambiguation. Memory disambiguation happens when a load which has been issued conflicts with a previous unretired store in the pipeline whose address was not known at issue time, but is later resolved to be the same as the load address.",
259 "EventCode": "0xC3",
260 "Counter": "0,1,2,3",
261 "UMask": "0x8",
262 "PEBScounters": "0,1,2,3",
263 "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
264 "PDIR_COUNTER": "na",
265 "SampleAfterValue": "20003",
266 "BriefDescription": "Machine clears due to memory disambiguation"
267 },
268 {
269 "CollectPEBSRecord": "1",
270 "PublicDescription": "Counts the number of times that the machines clears due to a page fault. Covers both I-side and D-side(Loads/Stores) page faults. A page fault occurs when either page is not present, or an access violation",
271 "EventCode": "0xC3",
272 "Counter": "0,1,2,3",
273 "UMask": "0x20",
274 "PEBScounters": "0,1,2,3",
275 "EventName": "MACHINE_CLEARS.PAGE_FAULT",
276 "PDIR_COUNTER": "na",
277 "SampleAfterValue": "20003",
278 "BriefDescription": "Machines clear due to a page fault"
279 },
280 {
281 "PEBS": "2",
282 "CollectPEBSRecord": "2",
283 "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.",
284 "EventCode": "0xC4",
285 "Counter": "0,1,2,3",
286 "UMask": "0x0",
287 "PEBScounters": "0,1,2,3",
288 "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
289 "SampleAfterValue": "200003",
290 "BriefDescription": "Retired branch instructions (Precise event capable)"
291 },
292 {
293 "PEBS": "2",
294 "CollectPEBSRecord": "2",
295 "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.",
296 "EventCode": "0xC4",
297 "Counter": "0,1,2,3",
298 "UMask": "0x7e",
299 "PEBScounters": "0,1,2,3",
300 "EventName": "BR_INST_RETIRED.JCC",
301 "SampleAfterValue": "200003",
302 "BriefDescription": "Retired conditional branch instructions (Precise event capable)"
303 },
304 {
305 "PEBS": "2",
306 "CollectPEBSRecord": "2",
307 "PublicDescription": "Counts the number of taken branch instructions retired.",
308 "EventCode": "0xC4",
309 "Counter": "0,1,2,3",
310 "UMask": "0x80",
311 "PEBScounters": "0,1,2,3",
312 "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
313 "SampleAfterValue": "200003",
314 "BriefDescription": "Retired taken branch instructions (Precise event capable)"
315 },
316 {
317 "PEBS": "2",
318 "CollectPEBSRecord": "2",
319 "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.",
320 "EventCode": "0xC4",
321 "Counter": "0,1,2,3",
322 "UMask": "0xbf",
323 "PEBScounters": "0,1,2,3",
324 "EventName": "BR_INST_RETIRED.FAR_BRANCH",
325 "SampleAfterValue": "200003",
326 "BriefDescription": "Retired far branch instructions (Precise event capable)"
327 },
328 {
329 "PEBS": "2",
330 "CollectPEBSRecord": "2",
331 "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.",
332 "EventCode": "0xC4",
333 "Counter": "0,1,2,3",
334 "UMask": "0xeb",
335 "PEBScounters": "0,1,2,3",
336 "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
337 "SampleAfterValue": "200003",
338 "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)"
339 },
340 {
341 "PEBS": "2",
342 "CollectPEBSRecord": "2",
343 "PublicDescription": "Counts near return branch instructions retired.",
344 "EventCode": "0xC4",
345 "Counter": "0,1,2,3",
346 "UMask": "0xf7",
347 "PEBScounters": "0,1,2,3",
348 "EventName": "BR_INST_RETIRED.RETURN",
349 "SampleAfterValue": "200003",
350 "BriefDescription": "Retired near return instructions (Precise event capable)"
351 },
352 {
353 "PEBS": "2",
354 "CollectPEBSRecord": "2",
355 "PublicDescription": "Counts near CALL branch instructions retired.",
356 "EventCode": "0xC4",
357 "Counter": "0,1,2,3",
358 "UMask": "0xf9",
359 "PEBScounters": "0,1,2,3",
360 "EventName": "BR_INST_RETIRED.CALL",
361 "SampleAfterValue": "200003",
362 "BriefDescription": "Retired near call instructions (Precise event capable)"
363 },
364 {
365 "PEBS": "2",
366 "CollectPEBSRecord": "2",
367 "PublicDescription": "Counts near indirect CALL branch instructions retired.",
368 "EventCode": "0xC4",
369 "Counter": "0,1,2,3",
370 "UMask": "0xfb",
371 "PEBScounters": "0,1,2,3",
372 "EventName": "BR_INST_RETIRED.IND_CALL",
373 "SampleAfterValue": "200003",
374 "BriefDescription": "Retired near indirect call instructions (Precise event capable)"
375 },
376 {
377 "PEBS": "2",
378 "CollectPEBSRecord": "2",
379 "PublicDescription": "Counts near relative CALL branch instructions retired.",
380 "EventCode": "0xC4",
381 "Counter": "0,1,2,3",
382 "UMask": "0xfd",
383 "PEBScounters": "0,1,2,3",
384 "EventName": "BR_INST_RETIRED.REL_CALL",
385 "SampleAfterValue": "200003",
386 "BriefDescription": "Retired near relative call instructions (Precise event capable)"
387 },
388 {
389 "PEBS": "2",
390 "CollectPEBSRecord": "2",
391 "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.",
392 "EventCode": "0xC4",
393 "Counter": "0,1,2,3",
394 "UMask": "0xfe",
395 "PEBScounters": "0,1,2,3",
396 "EventName": "BR_INST_RETIRED.TAKEN_JCC",
397 "SampleAfterValue": "200003",
398 "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)"
399 },
400 {
401 "PEBS": "2",
402 "CollectPEBSRecord": "2",
403 "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.",
404 "EventCode": "0xC5",
405 "Counter": "0,1,2,3",
406 "UMask": "0x0",
407 "PEBScounters": "0,1,2,3",
408 "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
409 "SampleAfterValue": "200003",
410 "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)"
411 },
412 {
413 "PEBS": "2",
414 "CollectPEBSRecord": "2",
415 "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).",
416 "EventCode": "0xC5",
417 "Counter": "0,1,2,3",
418 "UMask": "0x7e",
419 "PEBScounters": "0,1,2,3",
420 "EventName": "BR_MISP_RETIRED.JCC",
421 "SampleAfterValue": "200003",
422 "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)"
423 },
424 {
425 "PEBS": "2",
426 "CollectPEBSRecord": "2",
427 "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.",
428 "EventCode": "0xC5",
429 "Counter": "0,1,2,3",
430 "UMask": "0xeb",
431 "PEBScounters": "0,1,2,3",
432 "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
433 "SampleAfterValue": "200003",
434 "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call (Precise event capable)"
435 },
436 {
437 "PEBS": "2",
438 "CollectPEBSRecord": "2",
439 "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.",
440 "EventCode": "0xC5",
441 "Counter": "0,1,2,3",
442 "UMask": "0xf7",
443 "PEBScounters": "0,1,2,3",
444 "EventName": "BR_MISP_RETIRED.RETURN",
445 "SampleAfterValue": "200003",
446 "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)"
447 },
448 {
449 "PEBS": "2",
450 "CollectPEBSRecord": "2",
451 "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.",
452 "EventCode": "0xC5",
453 "Counter": "0,1,2,3",
454 "UMask": "0xfb",
455 "PEBScounters": "0,1,2,3",
456 "EventName": "BR_MISP_RETIRED.IND_CALL",
457 "SampleAfterValue": "200003",
458 "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)"
459 },
460 {
461 "PEBS": "2",
462 "CollectPEBSRecord": "2",
463 "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.",
464 "EventCode": "0xC5",
465 "Counter": "0,1,2,3",
466 "UMask": "0xfe",
467 "PEBScounters": "0,1,2,3",
468 "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
469 "SampleAfterValue": "200003",
470 "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)"
471 },
472 {
473 "CollectPEBSRecord": "1",
474 "PublicDescription": "Counts core cycles if either divide unit is busy.",
475 "EventCode": "0xCD",
476 "Counter": "0,1,2,3",
477 "UMask": "0x0",
478 "PEBScounters": "0,1,2,3",
479 "EventName": "CYCLES_DIV_BUSY.ALL",
480 "PDIR_COUNTER": "na",
481 "SampleAfterValue": "2000003",
482 "BriefDescription": "Cycles a divider is busy"
483 },
484 {
485 "CollectPEBSRecord": "1",
486 "PublicDescription": "Counts core cycles the integer divide unit is busy.",
487 "EventCode": "0xCD",
488 "Counter": "0,1,2,3",
489 "UMask": "0x1",
490 "PEBScounters": "0,1,2,3",
491 "EventName": "CYCLES_DIV_BUSY.IDIV",
492 "PDIR_COUNTER": "na",
493 "SampleAfterValue": "200003",
494 "BriefDescription": "Cycles the integer divide unit is busy"
495 },
496 {
497 "CollectPEBSRecord": "1",
498 "PublicDescription": "Counts core cycles the floating point divide unit is busy.",
499 "EventCode": "0xCD",
500 "Counter": "0,1,2,3",
501 "UMask": "0x2",
502 "PEBScounters": "0,1,2,3",
503 "EventName": "CYCLES_DIV_BUSY.FPDIV",
504 "PDIR_COUNTER": "na",
505 "SampleAfterValue": "200003",
506 "BriefDescription": "Cycles the FP divide unit is busy"
507 },
508 {
509 "CollectPEBSRecord": "1",
510 "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.",
511 "EventCode": "0xE6",
512 "Counter": "0,1,2,3",
513 "UMask": "0x1",
514 "PEBScounters": "0,1,2,3",
515 "EventName": "BACLEARS.ALL",
516 "PDIR_COUNTER": "na",
517 "SampleAfterValue": "200003",
518 "BriefDescription": "BACLEARs asserted for any branch type"
519 },
520 {
521 "CollectPEBSRecord": "1",
522 "PublicDescription": "Counts BACLEARS on return instructions.",
523 "EventCode": "0xE6",
524 "Counter": "0,1,2,3",
525 "UMask": "0x8",
526 "PEBScounters": "0,1,2,3",
527 "EventName": "BACLEARS.RETURN",
528 "PDIR_COUNTER": "na",
529 "SampleAfterValue": "200003",
530 "BriefDescription": "BACLEARs asserted for return branch"
531 },
532 {
533 "CollectPEBSRecord": "1",
534 "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.",
535 "EventCode": "0xE6",
536 "Counter": "0,1,2,3",
537 "UMask": "0x10",
538 "PEBScounters": "0,1,2,3",
539 "EventName": "BACLEARS.COND",
540 "PDIR_COUNTER": "na",
541 "SampleAfterValue": "200003",
542 "BriefDescription": "BACLEARs asserted for conditional branch"
543 }
544] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
new file mode 100644
index 000000000000..0b53a3b0dfb8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
@@ -0,0 +1,218 @@
1[
2 {
3 "CollectPEBSRecord": "1",
4 "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 4K pages. The page walks can end with or without a page fault.",
5 "EventCode": "0x08",
6 "Counter": "0,1,2,3",
7 "UMask": "0x2",
8 "PEBScounters": "0,1,2,3",
9 "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
10 "PDIR_COUNTER": "na",
11 "SampleAfterValue": "200003",
12 "BriefDescription": "Page walk completed due to a demand load to a 4K page"
13 },
14 {
15 "CollectPEBSRecord": "1",
16 "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.",
17 "EventCode": "0x08",
18 "Counter": "0,1,2,3",
19 "UMask": "0x4",
20 "PEBScounters": "0,1,2,3",
21 "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
22 "PDIR_COUNTER": "na",
23 "SampleAfterValue": "200003",
24 "BriefDescription": "Page walk completed due to a demand load to a 2M or 4M page"
25 },
26 {
27 "CollectPEBSRecord": "1",
28 "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 1GB pages. The page walks can end with or without a page fault.",
29 "EventCode": "0x08",
30 "Counter": "0,1,2,3",
31 "UMask": "0x8",
32 "PEBScounters": "0,1,2,3",
33 "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1GB",
34 "PDIR_COUNTER": "na",
35 "SampleAfterValue": "200003",
36 "BriefDescription": "Page walk completed due to a demand load to a 1GB page"
37 },
38 {
39 "CollectPEBSRecord": "1",
40 "PublicDescription": "Counts once per cycle for each page walk occurring due to a load (demand data loads or SW prefetches). Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
41 "EventCode": "0x08",
42 "Counter": "0,1,2,3",
43 "UMask": "0x10",
44 "PEBScounters": "0,1,2,3",
45 "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
46 "PDIR_COUNTER": "na",
47 "SampleAfterValue": "200003",
48 "BriefDescription": "Page walks outstanding due to a demand load every cycle."
49 },
50 {
51 "CollectPEBSRecord": "1",
52 "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
53 "EventCode": "0x49",
54 "Counter": "0,1,2,3",
55 "UMask": "0x2",
56 "PEBScounters": "0,1,2,3",
57 "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
58 "PDIR_COUNTER": "na",
59 "SampleAfterValue": "2000003",
60 "BriefDescription": "Page walk completed due to a demand data store to a 4K page"
61 },
62 {
63 "CollectPEBSRecord": "1",
64 "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.",
65 "EventCode": "0x49",
66 "Counter": "0,1,2,3",
67 "UMask": "0x4",
68 "PEBScounters": "0,1,2,3",
69 "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
70 "PDIR_COUNTER": "na",
71 "SampleAfterValue": "2000003",
72 "BriefDescription": "Page walk completed due to a demand data store to a 2M or 4M page"
73 },
74 {
75 "CollectPEBSRecord": "1",
76 "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1GB pages. The page walks can end with or without a page fault.",
77 "EventCode": "0x49",
78 "Counter": "0,1,2,3",
79 "UMask": "0x8",
80 "PEBScounters": "0,1,2,3",
81 "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1GB",
82 "PDIR_COUNTER": "na",
83 "SampleAfterValue": "2000003",
84 "BriefDescription": "Page walk completed due to a demand data store to a 1GB page"
85 },
86 {
87 "CollectPEBSRecord": "1",
88 "PublicDescription": "Counts once per cycle for each page walk occurring due to a demand data store. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
89 "EventCode": "0x49",
90 "Counter": "0,1,2,3",
91 "UMask": "0x10",
92 "PEBScounters": "0,1,2,3",
93 "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
94 "PDIR_COUNTER": "na",
95 "SampleAfterValue": "200003",
96 "BriefDescription": "Page walks outstanding due to a demand data store every cycle."
97 },
98 {
99 "CollectPEBSRecord": "1",
100 "PublicDescription": "Counts once per cycle for each page walk only while traversing the Extended Page Table (EPT), and does not count during the rest of the translation. The EPT is used for translating Guest-Physical Addresses to Physical Addresses for Virtual Machine Monitors (VMMs). Average cycles per walk can be calculated by dividing the count by number of walks.",
101 "EventCode": "0x4F",
102 "Counter": "0,1,2,3",
103 "UMask": "0x10",
104 "PEBScounters": "0,1,2,3",
105 "EventName": "EPT.WALK_PENDING",
106 "PDIR_COUNTER": "na",
107 "SampleAfterValue": "200003",
108 "BriefDescription": "Page walks outstanding due to walking the EPT every cycle"
109 },
110 {
111 "CollectPEBSRecord": "1",
112 "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch. It counts when new translation are filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
113 "EventCode": "0x81",
114 "Counter": "0,1,2,3",
115 "UMask": "0x4",
116 "PEBScounters": "0,1,2,3",
117 "EventName": "ITLB.MISS",
118 "PDIR_COUNTER": "na",
119 "SampleAfterValue": "200003",
120 "BriefDescription": "ITLB misses"
121 },
122 {
123 "CollectPEBSRecord": "1",
124 "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
125 "EventCode": "0x85",
126 "Counter": "0,1,2,3",
127 "UMask": "0x2",
128 "PEBScounters": "0,1,2,3",
129 "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
130 "PDIR_COUNTER": "na",
131 "SampleAfterValue": "2000003",
132 "BriefDescription": "Page walk completed due to an instruction fetch in a 4K page"
133 },
134 {
135 "CollectPEBSRecord": "1",
136 "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.",
137 "EventCode": "0x85",
138 "Counter": "0,1,2,3",
139 "UMask": "0x4",
140 "PEBScounters": "0,1,2,3",
141 "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
142 "PDIR_COUNTER": "na",
143 "SampleAfterValue": "2000003",
144 "BriefDescription": "Page walk completed due to an instruction fetch in a 2M or 4M page"
145 },
146 {
147 "CollectPEBSRecord": "1",
148 "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 1GB pages. The page walks can end with or without a page fault.",
149 "EventCode": "0x85",
150 "Counter": "0,1,2,3",
151 "UMask": "0x8",
152 "PEBScounters": "0,1,2,3",
153 "EventName": "ITLB_MISSES.WALK_COMPLETED_1GB",
154 "PDIR_COUNTER": "na",
155 "SampleAfterValue": "2000003",
156 "BriefDescription": "Page walk completed due to an instruction fetch in a 1GB page"
157 },
158 {
159 "CollectPEBSRecord": "1",
160 "PublicDescription": "Counts once per cycle for each page walk occurring due to an instruction fetch. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
161 "EventCode": "0x85",
162 "Counter": "0,1,2,3",
163 "UMask": "0x10",
164 "PEBScounters": "0,1,2,3",
165 "EventName": "ITLB_MISSES.WALK_PENDING",
166 "PDIR_COUNTER": "na",
167 "SampleAfterValue": "200003",
168 "BriefDescription": "Page walks outstanding due to an instruction fetch every cycle."
169 },
170 {
171 "CollectPEBSRecord": "1",
172 "PublicDescription": "Counts STLB flushes. The TLBs are flushed on instructions like INVLPG and MOV to CR3.",
173 "EventCode": "0xBD",
174 "Counter": "0,1,2,3",
175 "UMask": "0x20",
176 "PEBScounters": "0,1,2,3",
177 "EventName": "TLB_FLUSHES.STLB_ANY",
178 "PDIR_COUNTER": "na",
179 "SampleAfterValue": "20003",
180 "BriefDescription": "STLB flushes"
181 },
182 {
183 "PEBS": "2",
184 "CollectPEBSRecord": "2",
185 "PublicDescription": "Counts load uops retired that caused a DTLB miss.",
186 "EventCode": "0xD0",
187 "Counter": "0,1,2,3",
188 "UMask": "0x11",
189 "PEBScounters": "0,1,2,3",
190 "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
191 "SampleAfterValue": "200003",
192 "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
193 },
194 {
195 "PEBS": "2",
196 "CollectPEBSRecord": "2",
197 "PublicDescription": "Counts store uops retired that caused a DTLB miss.",
198 "EventCode": "0xD0",
199 "Counter": "0,1,2,3",
200 "UMask": "0x12",
201 "PEBScounters": "0,1,2,3",
202 "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
203 "SampleAfterValue": "200003",
204 "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
205 },
206 {
207 "PEBS": "2",
208 "CollectPEBSRecord": "2",
209 "PublicDescription": "Counts uops retired that had a DTLB miss on load, store or either. Note that when two distinct memory operations to the same page miss the DTLB, only one of them will be recorded as a DTLB miss.",
210 "EventCode": "0xD0",
211 "Counter": "0,1,2,3",
212 "UMask": "0x13",
213 "PEBScounters": "0,1,2,3",
214 "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
215 "SampleAfterValue": "200003",
216 "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
217 }
218] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
new file mode 100644
index 000000000000..5ab5c78fe580
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -0,0 +1,158 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
100 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
101 "MetricGroup": "Power",
102 "MetricName": "Turbo_Utilization"
103 },
104 {
105 "BriefDescription": "Fraction of cycles where both hardware threads were active",
106 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
107 "MetricGroup": "SMT;Summary",
108 "MetricName": "SMT_2T_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles spent in Kernel mode",
112 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
113 "MetricGroup": "Summary",
114 "MetricName": "Kernel_Utilization"
115 },
116 {
117 "BriefDescription": "C3 residency percent per core",
118 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
119 "MetricGroup": "Power",
120 "MetricName": "C3_Core_Residency"
121 },
122 {
123 "BriefDescription": "C6 residency percent per core",
124 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C6_Core_Residency"
127 },
128 {
129 "BriefDescription": "C7 residency percent per core",
130 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C7_Core_Residency"
133 },
134 {
135 "BriefDescription": "C2 residency percent per package",
136 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C2_Pkg_Residency"
139 },
140 {
141 "BriefDescription": "C3 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C3_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C6 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C6_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C7 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C7_Pkg_Residency"
157 }
158]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
new file mode 100644
index 000000000000..5ab5c78fe580
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -0,0 +1,158 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
100 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
101 "MetricGroup": "Power",
102 "MetricName": "Turbo_Utilization"
103 },
104 {
105 "BriefDescription": "Fraction of cycles where both hardware threads were active",
106 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
107 "MetricGroup": "SMT;Summary",
108 "MetricName": "SMT_2T_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles spent in Kernel mode",
112 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
113 "MetricGroup": "Summary",
114 "MetricName": "Kernel_Utilization"
115 },
116 {
117 "BriefDescription": "C3 residency percent per core",
118 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
119 "MetricGroup": "Power",
120 "MetricName": "C3_Core_Residency"
121 },
122 {
123 "BriefDescription": "C6 residency percent per core",
124 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C6_Core_Residency"
127 },
128 {
129 "BriefDescription": "C7 residency percent per core",
130 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C7_Core_Residency"
133 },
134 {
135 "BriefDescription": "C2 residency percent per package",
136 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C2_Pkg_Residency"
139 },
140 {
141 "BriefDescription": "C3 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C3_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C6 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C6_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C7 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C7_Pkg_Residency"
157 }
158]
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
new file mode 100644
index 000000000000..7c2679514efb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
new file mode 100644
index 000000000000..7c2679514efb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
new file mode 100644
index 000000000000..fd7d7c438226
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -0,0 +1,140 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
64 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
65 "MetricGroup": "SMT",
66 "MetricName": "CORE_CLKS"
67 },
68 {
69 "BriefDescription": "Average CPU Utilization",
70 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
71 "MetricGroup": "Summary",
72 "MetricName": "CPU_Utilization"
73 },
74 {
75 "BriefDescription": "Giga Floating Point Operations Per Second",
76 "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
77 "MetricGroup": "FLOPS;Summary",
78 "MetricName": "GFLOPs"
79 },
80 {
81 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
82 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
83 "MetricGroup": "Power",
84 "MetricName": "Turbo_Utilization"
85 },
86 {
87 "BriefDescription": "Fraction of cycles where both hardware threads were active",
88 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
89 "MetricGroup": "SMT;Summary",
90 "MetricName": "SMT_2T_Utilization"
91 },
92 {
93 "BriefDescription": "Fraction of cycles spent in Kernel mode",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
95 "MetricGroup": "Summary",
96 "MetricName": "Kernel_Utilization"
97 },
98 {
99 "BriefDescription": "C3 residency percent per core",
100 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
101 "MetricGroup": "Power",
102 "MetricName": "C3_Core_Residency"
103 },
104 {
105 "BriefDescription": "C6 residency percent per core",
106 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
107 "MetricGroup": "Power",
108 "MetricName": "C6_Core_Residency"
109 },
110 {
111 "BriefDescription": "C7 residency percent per core",
112 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
113 "MetricGroup": "Power",
114 "MetricName": "C7_Core_Residency"
115 },
116 {
117 "BriefDescription": "C2 residency percent per package",
118 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
119 "MetricGroup": "Power",
120 "MetricName": "C2_Pkg_Residency"
121 },
122 {
123 "BriefDescription": "C3 residency percent per package",
124 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Pkg_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per package",
130 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Pkg_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per package",
136 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Pkg_Residency"
139 }
140]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 4ea068366c3e..fe1a2c47cabf 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -9,6 +9,7 @@ GenuineIntel-6-27,v4,bonnell,core
9GenuineIntel-6-36,v4,bonnell,core 9GenuineIntel-6-36,v4,bonnell,core
10GenuineIntel-6-35,v4,bonnell,core 10GenuineIntel-6-35,v4,bonnell,core
11GenuineIntel-6-5C,v8,goldmont,core 11GenuineIntel-6-5C,v8,goldmont,core
12GenuineIntel-6-7A,v1,goldmontplus,core
12GenuineIntel-6-3C,v24,haswell,core 13GenuineIntel-6-3C,v24,haswell,core
13GenuineIntel-6-45,v24,haswell,core 14GenuineIntel-6-45,v24,haswell,core
14GenuineIntel-6-46,v24,haswell,core 15GenuineIntel-6-46,v24,haswell,core
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
new file mode 100644
index 000000000000..fd7d7c438226
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -0,0 +1,140 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
64 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
65 "MetricGroup": "SMT",
66 "MetricName": "CORE_CLKS"
67 },
68 {
69 "BriefDescription": "Average CPU Utilization",
70 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
71 "MetricGroup": "Summary",
72 "MetricName": "CPU_Utilization"
73 },
74 {
75 "BriefDescription": "Giga Floating Point Operations Per Second",
76 "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
77 "MetricGroup": "FLOPS;Summary",
78 "MetricName": "GFLOPs"
79 },
80 {
81 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
82 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
83 "MetricGroup": "Power",
84 "MetricName": "Turbo_Utilization"
85 },
86 {
87 "BriefDescription": "Fraction of cycles where both hardware threads were active",
88 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
89 "MetricGroup": "SMT;Summary",
90 "MetricName": "SMT_2T_Utilization"
91 },
92 {
93 "BriefDescription": "Fraction of cycles spent in Kernel mode",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
95 "MetricGroup": "Summary",
96 "MetricName": "Kernel_Utilization"
97 },
98 {
99 "BriefDescription": "C3 residency percent per core",
100 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
101 "MetricGroup": "Power",
102 "MetricName": "C3_Core_Residency"
103 },
104 {
105 "BriefDescription": "C6 residency percent per core",
106 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
107 "MetricGroup": "Power",
108 "MetricName": "C6_Core_Residency"
109 },
110 {
111 "BriefDescription": "C7 residency percent per core",
112 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
113 "MetricGroup": "Power",
114 "MetricName": "C7_Core_Residency"
115 },
116 {
117 "BriefDescription": "C2 residency percent per package",
118 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
119 "MetricGroup": "Power",
120 "MetricName": "C2_Pkg_Residency"
121 },
122 {
123 "BriefDescription": "C3 residency percent per package",
124 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Pkg_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per package",
130 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Pkg_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per package",
136 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Pkg_Residency"
139 }
140]
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
new file mode 100644
index 000000000000..36c903faed0b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
new file mode 100644
index 000000000000..36c903faed0b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -0,0 +1,164 @@
1[
2 {
3 "BriefDescription": "Instructions Per Cycle (per logical thread)",
4 "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5 "MetricGroup": "TopDownL1",
6 "MetricName": "IPC"
7 },
8 {
9 "BriefDescription": "Uops Per Instruction",
10 "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11 "MetricGroup": "Pipeline",
12 "MetricName": "UPI"
13 },
14 {
15 "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16 "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
17 "MetricGroup": "Frontend",
18 "MetricName": "IFetch_Line_Utilization"
19 },
20 {
21 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22 "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23 "MetricGroup": "DSB; Frontend_Bandwidth",
24 "MetricName": "DSB_Coverage"
25 },
26 {
27 "BriefDescription": "Cycles Per Instruction (threaded)",
28 "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29 "MetricGroup": "Pipeline;Summary",
30 "MetricName": "CPI"
31 },
32 {
33 "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34 "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35 "MetricGroup": "Summary",
36 "MetricName": "CLKS"
37 },
38 {
39 "BriefDescription": "Total issue-pipeline slots",
40 "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41 "MetricGroup": "TopDownL1",
42 "MetricName": "SLOTS"
43 },
44 {
45 "BriefDescription": "Total number of retired Instructions",
46 "MetricExpr": "INST_RETIRED.ANY",
47 "MetricGroup": "Summary",
48 "MetricName": "Instructions"
49 },
50 {
51 "BriefDescription": "Instructions Per Cycle (per physical core)",
52 "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53 "MetricGroup": "SMT",
54 "MetricName": "CoreIPC"
55 },
56 {
57 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58 "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
59 "MetricGroup": "Pipeline;Ports_Utilization",
60 "MetricName": "ILP"
61 },
62 {
63 "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64 "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
65 "MetricGroup": "Unknown_Branches",
66 "MetricName": "BAClear_Cost"
67 },
68 {
69 "BriefDescription": "Core actual clocks when any thread is active on the physical core",
70 "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71 "MetricGroup": "SMT",
72 "MetricName": "CORE_CLKS"
73 },
74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency"
79 },
80 {
81 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82 "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83 "MetricGroup": "Memory_Bound;Memory_BW",
84 "MetricName": "MLP"
85 },
86 {
87 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88 "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
89 "MetricGroup": "TLB",
90 "MetricName": "Page_Walks_Utilization"
91 },
92 {
93 "BriefDescription": "Average CPU Utilization",
94 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95 "MetricGroup": "Summary",
96 "MetricName": "CPU_Utilization"
97 },
98 {
99 "BriefDescription": "Giga Floating Point Operations Per Second",
100 "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
101 "MetricGroup": "FLOPS;Summary",
102 "MetricName": "GFLOPs"
103 },
104 {
105 "BriefDescription": "Average Frequency Utilization relative nominal frequency",
106 "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107 "MetricGroup": "Power",
108 "MetricName": "Turbo_Utilization"
109 },
110 {
111 "BriefDescription": "Fraction of cycles where both hardware threads were active",
112 "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113 "MetricGroup": "SMT;Summary",
114 "MetricName": "SMT_2T_Utilization"
115 },
116 {
117 "BriefDescription": "Fraction of cycles spent in Kernel mode",
118 "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119 "MetricGroup": "Summary",
120 "MetricName": "Kernel_Utilization"
121 },
122 {
123 "BriefDescription": "C3 residency percent per core",
124 "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125 "MetricGroup": "Power",
126 "MetricName": "C3_Core_Residency"
127 },
128 {
129 "BriefDescription": "C6 residency percent per core",
130 "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131 "MetricGroup": "Power",
132 "MetricName": "C6_Core_Residency"
133 },
134 {
135 "BriefDescription": "C7 residency percent per core",
136 "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137 "MetricGroup": "Power",
138 "MetricName": "C7_Core_Residency"
139 },
140 {
141 "BriefDescription": "C2 residency percent per package",
142 "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143 "MetricGroup": "Power",
144 "MetricName": "C2_Pkg_Residency"
145 },
146 {
147 "BriefDescription": "C3 residency percent per package",
148 "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149 "MetricGroup": "Power",
150 "MetricName": "C3_Pkg_Residency"
151 },
152 {
153 "BriefDescription": "C6 residency percent per package",
154 "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155 "MetricGroup": "Power",
156 "MetricName": "C6_Pkg_Residency"
157 },
158 {
159 "BriefDescription": "C7 residency percent per package",
160 "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161 "MetricGroup": "Power",
162 "MetricName": "C7_Pkg_Residency"
163 }
164]
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index d51dc9ca8861..9eb7047bafe4 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -292,7 +292,7 @@ static int print_events_table_entry(void *data, char *name, char *event,
292 char *desc, char *long_desc, 292 char *desc, char *long_desc,
293 char *pmu, char *unit, char *perpkg, 293 char *pmu, char *unit, char *perpkg,
294 char *metric_expr, 294 char *metric_expr,
295 char *metric_name) 295 char *metric_name, char *metric_group)
296{ 296{
297 struct perf_entry_data *pd = data; 297 struct perf_entry_data *pd = data;
298 FILE *outfp = pd->outfp; 298 FILE *outfp = pd->outfp;
@@ -304,8 +304,10 @@ static int print_events_table_entry(void *data, char *name, char *event,
304 */ 304 */
305 fprintf(outfp, "{\n"); 305 fprintf(outfp, "{\n");
306 306
307 fprintf(outfp, "\t.name = \"%s\",\n", name); 307 if (name)
308 fprintf(outfp, "\t.event = \"%s\",\n", event); 308 fprintf(outfp, "\t.name = \"%s\",\n", name);
309 if (event)
310 fprintf(outfp, "\t.event = \"%s\",\n", event);
309 fprintf(outfp, "\t.desc = \"%s\",\n", desc); 311 fprintf(outfp, "\t.desc = \"%s\",\n", desc);
310 fprintf(outfp, "\t.topic = \"%s\",\n", topic); 312 fprintf(outfp, "\t.topic = \"%s\",\n", topic);
311 if (long_desc && long_desc[0]) 313 if (long_desc && long_desc[0])
@@ -320,6 +322,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
320 fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); 322 fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
321 if (metric_name) 323 if (metric_name)
322 fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); 324 fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
325 if (metric_group)
326 fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
323 fprintf(outfp, "},\n"); 327 fprintf(outfp, "},\n");
324 328
325 return 0; 329 return 0;
@@ -357,6 +361,9 @@ static char *real_event(const char *name, char *event)
357{ 361{
358 int i; 362 int i;
359 363
364 if (!name)
365 return NULL;
366
360 for (i = 0; fixed[i].name; i++) 367 for (i = 0; fixed[i].name; i++)
361 if (!strcasecmp(name, fixed[i].name)) 368 if (!strcasecmp(name, fixed[i].name))
362 return (char *)fixed[i].event; 369 return (char *)fixed[i].event;
@@ -369,7 +376,7 @@ int json_events(const char *fn,
369 char *long_desc, 376 char *long_desc,
370 char *pmu, char *unit, char *perpkg, 377 char *pmu, char *unit, char *perpkg,
371 char *metric_expr, 378 char *metric_expr,
372 char *metric_name), 379 char *metric_name, char *metric_group),
373 void *data) 380 void *data)
374{ 381{
375 int err = -EIO; 382 int err = -EIO;
@@ -397,6 +404,7 @@ int json_events(const char *fn,
397 char *unit = NULL; 404 char *unit = NULL;
398 char *metric_expr = NULL; 405 char *metric_expr = NULL;
399 char *metric_name = NULL; 406 char *metric_name = NULL;
407 char *metric_group = NULL;
400 unsigned long long eventcode = 0; 408 unsigned long long eventcode = 0;
401 struct msrmap *msr = NULL; 409 struct msrmap *msr = NULL;
402 jsmntok_t *msrval = NULL; 410 jsmntok_t *msrval = NULL;
@@ -476,6 +484,8 @@ int json_events(const char *fn,
476 addfield(map, &perpkg, "", "", val); 484 addfield(map, &perpkg, "", "", val);
477 } else if (json_streq(map, field, "MetricName")) { 485 } else if (json_streq(map, field, "MetricName")) {
478 addfield(map, &metric_name, "", "", val); 486 addfield(map, &metric_name, "", "", val);
487 } else if (json_streq(map, field, "MetricGroup")) {
488 addfield(map, &metric_group, "", "", val);
479 } else if (json_streq(map, field, "MetricExpr")) { 489 } else if (json_streq(map, field, "MetricExpr")) {
480 addfield(map, &metric_expr, "", "", val); 490 addfield(map, &metric_expr, "", "", val);
481 for (s = metric_expr; *s; s++) 491 for (s = metric_expr; *s; s++)
@@ -501,10 +511,11 @@ int json_events(const char *fn,
501 addfield(map, &event, ",", filter, NULL); 511 addfield(map, &event, ",", filter, NULL);
502 if (msr != NULL) 512 if (msr != NULL)
503 addfield(map, &event, ",", msr->pname, msrval); 513 addfield(map, &event, ",", msr->pname, msrval);
504 fixname(name); 514 if (name)
515 fixname(name);
505 516
506 err = func(data, name, real_event(name, event), desc, long_desc, 517 err = func(data, name, real_event(name, event), desc, long_desc,
507 pmu, unit, perpkg, metric_expr, metric_name); 518 pmu, unit, perpkg, metric_expr, metric_name, metric_group);
508 free(event); 519 free(event);
509 free(desc); 520 free(desc);
510 free(name); 521 free(name);
@@ -516,6 +527,7 @@ int json_events(const char *fn,
516 free(unit); 527 free(unit);
517 free(metric_expr); 528 free(metric_expr);
518 free(metric_name); 529 free(metric_name);
530 free(metric_group);
519 if (err) 531 if (err)
520 break; 532 break;
521 tok += j; 533 tok += j;
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
index d87efd2685b8..4684c673c445 100644
--- a/tools/perf/pmu-events/jevents.h
+++ b/tools/perf/pmu-events/jevents.h
@@ -7,7 +7,7 @@ int json_events(const char *fn,
7 char *long_desc, 7 char *long_desc,
8 char *pmu, 8 char *pmu,
9 char *unit, char *perpkg, char *metric_expr, 9 char *unit, char *perpkg, char *metric_expr,
10 char *metric_name), 10 char *metric_name, char *metric_group),
11 void *data); 11 void *data);
12char *get_cpu_str(void); 12char *get_cpu_str(void);
13 13
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index e08789ddfe6c..92a4d15ee0b9 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -16,6 +16,7 @@ struct pmu_event {
16 const char *perpkg; 16 const char *perpkg;
17 const char *metric_expr; 17 const char *metric_expr;
18 const char *metric_name; 18 const char *metric_name;
19 const char *metric_group;
19}; 20};
20 21
21/* 22/*
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index c180bbcdbef6..0e1367f90af5 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -167,7 +167,7 @@ static int run_dir(const char *d, const char *perf)
167 snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", 167 snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
168 d, d, perf, vcnt, v); 168 d, d, perf, vcnt, v);
169 169
170 return system(cmd); 170 return system(cmd) ? TEST_FAIL : TEST_OK;
171} 171}
172 172
173int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) 173int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index 907b1b2f56ad..ff9b60b99f52 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -238,6 +238,7 @@ class Test(object):
238 # events in result. Fail if there's not any. 238 # events in result. Fail if there's not any.
239 for exp_name, exp_event in expect.items(): 239 for exp_name, exp_event in expect.items():
240 exp_list = [] 240 exp_list = []
241 res_event = {}
241 log.debug(" matching [%s]" % exp_name) 242 log.debug(" matching [%s]" % exp_name)
242 for res_name, res_event in result.items(): 243 for res_name, res_event in result.items():
243 log.debug(" to [%s]" % res_name) 244 log.debug(" to [%s]" % res_name)
@@ -254,7 +255,10 @@ class Test(object):
254 if exp_event.optional(): 255 if exp_event.optional():
255 log.debug(" %s does not match, but is optional" % exp_name) 256 log.debug(" %s does not match, but is optional" % exp_name)
256 else: 257 else:
257 exp_event.diff(res_event) 258 if not res_event:
259 log.debug(" res_event is empty");
260 else:
261 exp_event.diff(res_event)
258 raise Fail(self, 'match failure'); 262 raise Fail(self, 'match failure');
259 263
260 match[exp_name] = exp_list 264 match[exp_name] = exp_list
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 31e0b1da830b..37940665f736 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -23,7 +23,7 @@ comm=1
23freq=1 23freq=1
24inherit_stat=0 24inherit_stat=0
25enable_on_exec=1 25enable_on_exec=1
26task=0 26task=1
27watermark=0 27watermark=0
28precise_ip=0|1|2|3 28precise_ip=0|1|2|3
29mmap_data=0 29mmap_data=0
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
index 6e7961f6f7a5..618ba1c17474 100644
--- a/tools/perf/tests/attr/test-record-group
+++ b/tools/perf/tests/attr/test-record-group
@@ -17,5 +17,6 @@ sample_type=327
17read_format=4 17read_format=4
18mmap=0 18mmap=0
19comm=0 19comm=0
20task=0
20enable_on_exec=0 21enable_on_exec=0
21disabled=0 22disabled=0
diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
index ef59afd6d635..f906b793196f 100644
--- a/tools/perf/tests/attr/test-record-group-sampling
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -23,7 +23,7 @@ sample_type=343
23 23
24# PERF_FORMAT_ID | PERF_FORMAT_GROUP 24# PERF_FORMAT_ID | PERF_FORMAT_GROUP
25read_format=12 25read_format=12
26 26task=0
27mmap=0 27mmap=0
28comm=0 28comm=0
29enable_on_exec=0 29enable_on_exec=0
diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1
index 87a222d014d8..48e8bd12fe46 100644
--- a/tools/perf/tests/attr/test-record-group1
+++ b/tools/perf/tests/attr/test-record-group1
@@ -18,5 +18,6 @@ sample_type=327
18read_format=4 18read_format=4
19mmap=0 19mmap=0
20comm=0 20comm=0
21task=0
21enable_on_exec=0 22enable_on_exec=0
22disabled=0 23disabled=0
diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/attr/test-stat-C0
index 67717fe6a65d..a2c76d10b2bb 100644
--- a/tools/perf/tests/attr/test-stat-C0
+++ b/tools/perf/tests/attr/test-stat-C0
@@ -7,3 +7,4 @@ ret = 1
7# events are disabled by default when attached to cpu 7# events are disabled by default when attached to cpu
8disabled=1 8disabled=1
9enable_on_exec=0 9enable_on_exec=0
10optional=1
diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/attr/test-stat-basic
index 74e17881f2ba..69867d049fda 100644
--- a/tools/perf/tests/attr/test-stat-basic
+++ b/tools/perf/tests/attr/test-stat-basic
@@ -4,3 +4,4 @@ args = -e cycles kill >/dev/null 2>&1
4ret = 1 4ret = 1
5 5
6[event:base-stat] 6[event:base-stat]
7optional=1
diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default
index e911dbd4eb47..d9e99b3f77e6 100644
--- a/tools/perf/tests/attr/test-stat-default
+++ b/tools/perf/tests/attr/test-stat-default
@@ -32,6 +32,7 @@ config=2
32fd=5 32fd=5
33type=0 33type=0
34config=0 34config=0
35optional=1
35 36
36# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 37# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
37[event6:base-stat] 38[event6:base-stat]
@@ -52,15 +53,18 @@ optional=1
52fd=8 53fd=8
53type=0 54type=0
54config=1 55config=1
56optional=1
55 57
56# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS 58# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
57[event9:base-stat] 59[event9:base-stat]
58fd=9 60fd=9
59type=0 61type=0
60config=4 62config=4
63optional=1
61 64
62# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES 65# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
63[event10:base-stat] 66[event10:base-stat]
64fd=10 67fd=10
65type=0 68type=0
66config=5 69config=5
70optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1
index b39270a08e74..8b04a055d154 100644
--- a/tools/perf/tests/attr/test-stat-detailed-1
+++ b/tools/perf/tests/attr/test-stat-detailed-1
@@ -33,6 +33,7 @@ config=2
33fd=5 33fd=5
34type=0 34type=0
35config=0 35config=0
36optional=1
36 37
37# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 38# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
38[event6:base-stat] 39[event6:base-stat]
@@ -53,18 +54,21 @@ optional=1
53fd=8 54fd=8
54type=0 55type=0
55config=1 56config=1
57optional=1
56 58
57# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS 59# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
58[event9:base-stat] 60[event9:base-stat]
59fd=9 61fd=9
60type=0 62type=0
61config=4 63config=4
64optional=1
62 65
63# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES 66# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
64[event10:base-stat] 67[event10:base-stat]
65fd=10 68fd=10
66type=0 69type=0
67config=5 70config=5
71optional=1
68 72
69# PERF_TYPE_HW_CACHE / 73# PERF_TYPE_HW_CACHE /
70# PERF_COUNT_HW_CACHE_L1D << 0 | 74# PERF_COUNT_HW_CACHE_L1D << 0 |
@@ -74,6 +78,7 @@ config=5
74fd=11 78fd=11
75type=3 79type=3
76config=0 80config=0
81optional=1
77 82
78# PERF_TYPE_HW_CACHE / 83# PERF_TYPE_HW_CACHE /
79# PERF_COUNT_HW_CACHE_L1D << 0 | 84# PERF_COUNT_HW_CACHE_L1D << 0 |
@@ -83,6 +88,7 @@ config=0
83fd=12 88fd=12
84type=3 89type=3
85config=65536 90config=65536
91optional=1
86 92
87# PERF_TYPE_HW_CACHE / 93# PERF_TYPE_HW_CACHE /
88# PERF_COUNT_HW_CACHE_LL << 0 | 94# PERF_COUNT_HW_CACHE_LL << 0 |
@@ -92,6 +98,7 @@ config=65536
92fd=13 98fd=13
93type=3 99type=3
94config=2 100config=2
101optional=1
95 102
96# PERF_TYPE_HW_CACHE, 103# PERF_TYPE_HW_CACHE,
97# PERF_COUNT_HW_CACHE_LL << 0 | 104# PERF_COUNT_HW_CACHE_LL << 0 |
@@ -101,3 +108,4 @@ config=2
101fd=14 108fd=14
102type=3 109type=3
103config=65538 110config=65538
111optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2
index 45f8e6ea34f8..4fca9f1bfbf8 100644
--- a/tools/perf/tests/attr/test-stat-detailed-2
+++ b/tools/perf/tests/attr/test-stat-detailed-2
@@ -33,6 +33,7 @@ config=2
33fd=5 33fd=5
34type=0 34type=0
35config=0 35config=0
36optional=1
36 37
37# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 38# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
38[event6:base-stat] 39[event6:base-stat]
@@ -53,18 +54,21 @@ optional=1
53fd=8 54fd=8
54type=0 55type=0
55config=1 56config=1
57optional=1
56 58
57# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS 59# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
58[event9:base-stat] 60[event9:base-stat]
59fd=9 61fd=9
60type=0 62type=0
61config=4 63config=4
64optional=1
62 65
63# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES 66# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
64[event10:base-stat] 67[event10:base-stat]
65fd=10 68fd=10
66type=0 69type=0
67config=5 70config=5
71optional=1
68 72
69# PERF_TYPE_HW_CACHE / 73# PERF_TYPE_HW_CACHE /
70# PERF_COUNT_HW_CACHE_L1D << 0 | 74# PERF_COUNT_HW_CACHE_L1D << 0 |
@@ -74,6 +78,7 @@ config=5
74fd=11 78fd=11
75type=3 79type=3
76config=0 80config=0
81optional=1
77 82
78# PERF_TYPE_HW_CACHE / 83# PERF_TYPE_HW_CACHE /
79# PERF_COUNT_HW_CACHE_L1D << 0 | 84# PERF_COUNT_HW_CACHE_L1D << 0 |
@@ -83,6 +88,7 @@ config=0
83fd=12 88fd=12
84type=3 89type=3
85config=65536 90config=65536
91optional=1
86 92
87# PERF_TYPE_HW_CACHE / 93# PERF_TYPE_HW_CACHE /
88# PERF_COUNT_HW_CACHE_LL << 0 | 94# PERF_COUNT_HW_CACHE_LL << 0 |
@@ -92,6 +98,7 @@ config=65536
92fd=13 98fd=13
93type=3 99type=3
94config=2 100config=2
101optional=1
95 102
96# PERF_TYPE_HW_CACHE, 103# PERF_TYPE_HW_CACHE,
97# PERF_COUNT_HW_CACHE_LL << 0 | 104# PERF_COUNT_HW_CACHE_LL << 0 |
@@ -101,6 +108,7 @@ config=2
101fd=14 108fd=14
102type=3 109type=3
103config=65538 110config=65538
111optional=1
104 112
105# PERF_TYPE_HW_CACHE, 113# PERF_TYPE_HW_CACHE,
106# PERF_COUNT_HW_CACHE_L1I << 0 | 114# PERF_COUNT_HW_CACHE_L1I << 0 |
@@ -120,6 +128,7 @@ optional=1
120fd=16 128fd=16
121type=3 129type=3
122config=65537 130config=65537
131optional=1
123 132
124# PERF_TYPE_HW_CACHE, 133# PERF_TYPE_HW_CACHE,
125# PERF_COUNT_HW_CACHE_DTLB << 0 | 134# PERF_COUNT_HW_CACHE_DTLB << 0 |
@@ -129,6 +138,7 @@ config=65537
129fd=17 138fd=17
130type=3 139type=3
131config=3 140config=3
141optional=1
132 142
133# PERF_TYPE_HW_CACHE, 143# PERF_TYPE_HW_CACHE,
134# PERF_COUNT_HW_CACHE_DTLB << 0 | 144# PERF_COUNT_HW_CACHE_DTLB << 0 |
@@ -138,6 +148,7 @@ config=3
138fd=18 148fd=18
139type=3 149type=3
140config=65539 150config=65539
151optional=1
141 152
142# PERF_TYPE_HW_CACHE, 153# PERF_TYPE_HW_CACHE,
143# PERF_COUNT_HW_CACHE_ITLB << 0 | 154# PERF_COUNT_HW_CACHE_ITLB << 0 |
@@ -147,6 +158,7 @@ config=65539
147fd=19 158fd=19
148type=3 159type=3
149config=4 160config=4
161optional=1
150 162
151# PERF_TYPE_HW_CACHE, 163# PERF_TYPE_HW_CACHE,
152# PERF_COUNT_HW_CACHE_ITLB << 0 | 164# PERF_COUNT_HW_CACHE_ITLB << 0 |
@@ -156,3 +168,4 @@ config=4
156fd=20 168fd=20
157type=3 169type=3
158config=65540 170config=65540
171optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3
index 30ae0fb7a3fd..4bb58e1c82a6 100644
--- a/tools/perf/tests/attr/test-stat-detailed-3
+++ b/tools/perf/tests/attr/test-stat-detailed-3
@@ -33,6 +33,7 @@ config=2
33fd=5 33fd=5
34type=0 34type=0
35config=0 35config=0
36optional=1
36 37
37# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 38# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
38[event6:base-stat] 39[event6:base-stat]
@@ -53,18 +54,21 @@ optional=1
53fd=8 54fd=8
54type=0 55type=0
55config=1 56config=1
57optional=1
56 58
57# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS 59# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
58[event9:base-stat] 60[event9:base-stat]
59fd=9 61fd=9
60type=0 62type=0
61config=4 63config=4
64optional=1
62 65
63# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES 66# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
64[event10:base-stat] 67[event10:base-stat]
65fd=10 68fd=10
66type=0 69type=0
67config=5 70config=5
71optional=1
68 72
69# PERF_TYPE_HW_CACHE / 73# PERF_TYPE_HW_CACHE /
70# PERF_COUNT_HW_CACHE_L1D << 0 | 74# PERF_COUNT_HW_CACHE_L1D << 0 |
@@ -74,6 +78,7 @@ config=5
74fd=11 78fd=11
75type=3 79type=3
76config=0 80config=0
81optional=1
77 82
78# PERF_TYPE_HW_CACHE / 83# PERF_TYPE_HW_CACHE /
79# PERF_COUNT_HW_CACHE_L1D << 0 | 84# PERF_COUNT_HW_CACHE_L1D << 0 |
@@ -83,6 +88,7 @@ config=0
83fd=12 88fd=12
84type=3 89type=3
85config=65536 90config=65536
91optional=1
86 92
87# PERF_TYPE_HW_CACHE / 93# PERF_TYPE_HW_CACHE /
88# PERF_COUNT_HW_CACHE_LL << 0 | 94# PERF_COUNT_HW_CACHE_LL << 0 |
@@ -92,6 +98,7 @@ config=65536
92fd=13 98fd=13
93type=3 99type=3
94config=2 100config=2
101optional=1
95 102
96# PERF_TYPE_HW_CACHE, 103# PERF_TYPE_HW_CACHE,
97# PERF_COUNT_HW_CACHE_LL << 0 | 104# PERF_COUNT_HW_CACHE_LL << 0 |
@@ -101,6 +108,7 @@ config=2
101fd=14 108fd=14
102type=3 109type=3
103config=65538 110config=65538
111optional=1
104 112
105# PERF_TYPE_HW_CACHE, 113# PERF_TYPE_HW_CACHE,
106# PERF_COUNT_HW_CACHE_L1I << 0 | 114# PERF_COUNT_HW_CACHE_L1I << 0 |
@@ -120,6 +128,7 @@ optional=1
120fd=16 128fd=16
121type=3 129type=3
122config=65537 130config=65537
131optional=1
123 132
124# PERF_TYPE_HW_CACHE, 133# PERF_TYPE_HW_CACHE,
125# PERF_COUNT_HW_CACHE_DTLB << 0 | 134# PERF_COUNT_HW_CACHE_DTLB << 0 |
@@ -129,6 +138,7 @@ config=65537
129fd=17 138fd=17
130type=3 139type=3
131config=3 140config=3
141optional=1
132 142
133# PERF_TYPE_HW_CACHE, 143# PERF_TYPE_HW_CACHE,
134# PERF_COUNT_HW_CACHE_DTLB << 0 | 144# PERF_COUNT_HW_CACHE_DTLB << 0 |
@@ -138,6 +148,7 @@ config=3
138fd=18 148fd=18
139type=3 149type=3
140config=65539 150config=65539
151optional=1
141 152
142# PERF_TYPE_HW_CACHE, 153# PERF_TYPE_HW_CACHE,
143# PERF_COUNT_HW_CACHE_ITLB << 0 | 154# PERF_COUNT_HW_CACHE_ITLB << 0 |
@@ -147,6 +158,7 @@ config=65539
147fd=19 158fd=19
148type=3 159type=3
149config=4 160config=4
161optional=1
150 162
151# PERF_TYPE_HW_CACHE, 163# PERF_TYPE_HW_CACHE,
152# PERF_COUNT_HW_CACHE_ITLB << 0 | 164# PERF_COUNT_HW_CACHE_ITLB << 0 |
@@ -156,6 +168,7 @@ config=4
156fd=20 168fd=20
157type=3 169type=3
158config=65540 170config=65540
171optional=1
159 172
160# PERF_TYPE_HW_CACHE, 173# PERF_TYPE_HW_CACHE,
161# PERF_COUNT_HW_CACHE_L1D << 0 | 174# PERF_COUNT_HW_CACHE_L1D << 0 |
diff --git a/tools/perf/tests/attr/test-stat-group b/tools/perf/tests/attr/test-stat-group
index fdc1596a8862..e15d6946e9b3 100644
--- a/tools/perf/tests/attr/test-stat-group
+++ b/tools/perf/tests/attr/test-stat-group
@@ -6,6 +6,7 @@ ret = 1
6[event-1:base-stat] 6[event-1:base-stat]
7fd=1 7fd=1
8group_fd=-1 8group_fd=-1
9read_format=3|15
9 10
10[event-2:base-stat] 11[event-2:base-stat]
11fd=2 12fd=2
@@ -13,3 +14,4 @@ group_fd=1
13config=1 14config=1
14disabled=0 15disabled=0
15enable_on_exec=0 16enable_on_exec=0
17read_format=3|15
diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/attr/test-stat-group1
index 2a1f86e4a904..1746751123dc 100644
--- a/tools/perf/tests/attr/test-stat-group1
+++ b/tools/perf/tests/attr/test-stat-group1
@@ -6,6 +6,7 @@ ret = 1
6[event-1:base-stat] 6[event-1:base-stat]
7fd=1 7fd=1
8group_fd=-1 8group_fd=-1
9read_format=3|15
9 10
10[event-2:base-stat] 11[event-2:base-stat]
11fd=2 12fd=2
@@ -13,3 +14,4 @@ group_fd=1
13config=1 14config=1
14disabled=0 15disabled=0
15enable_on_exec=0 16enable_on_exec=0
17read_format=3|15
diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/attr/test-stat-no-inherit
index d54b2a1e3e28..924fbb9300d1 100644
--- a/tools/perf/tests/attr/test-stat-no-inherit
+++ b/tools/perf/tests/attr/test-stat-no-inherit
@@ -5,3 +5,4 @@ ret = 1
5 5
6[event:base-stat] 6[event:base-stat]
7inherit=0 7inherit=0
8optional=1
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 53d06f37406a..766573e236e4 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -4,6 +4,7 @@
4 * 4 *
5 * Builtin regression testing command: ever growing number of sanity tests 5 * Builtin regression testing command: ever growing number of sanity tests
6 */ 6 */
7#include <fcntl.h>
7#include <errno.h> 8#include <errno.h>
8#include <unistd.h> 9#include <unistd.h>
9#include <string.h> 10#include <string.h>
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 3c3f3e029e33..868d82b501f4 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -132,7 +132,7 @@ static int synth_all(struct machine *machine)
132{ 132{
133 return perf_event__synthesize_threads(NULL, 133 return perf_event__synthesize_threads(NULL,
134 perf_event__process, 134 perf_event__process,
135 machine, 0, 500); 135 machine, 0, 500, 1);
136} 136}
137 137
138static int synth_process(struct machine *machine) 138static int synth_process(struct machine *machine)
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index a59db7c45a65..17cb1bb3448c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -30,12 +30,14 @@ static int get_temp(char *path)
30static int session_write_header(char *path) 30static int session_write_header(char *path)
31{ 31{
32 struct perf_session *session; 32 struct perf_session *session;
33 struct perf_data_file file = { 33 struct perf_data data = {
34 .path = path, 34 .file = {
35 .mode = PERF_DATA_MODE_WRITE, 35 .path = path,
36 },
37 .mode = PERF_DATA_MODE_WRITE,
36 }; 38 };
37 39
38 session = perf_session__new(&file, false, NULL); 40 session = perf_session__new(&data, false, NULL);
39 TEST_ASSERT_VAL("can't get session", session); 41 TEST_ASSERT_VAL("can't get session", session);
40 42
41 session->evlist = perf_evlist__new_default(); 43 session->evlist = perf_evlist__new_default();
@@ -47,7 +49,7 @@ static int session_write_header(char *path)
47 session->header.data_size += DATA_SIZE; 49 session->header.data_size += DATA_SIZE;
48 50
49 TEST_ASSERT_VAL("failed to write header", 51 TEST_ASSERT_VAL("failed to write header",
50 !perf_session__write_header(session, session->evlist, file.fd, true)); 52 !perf_session__write_header(session, session->evlist, data.file.fd, true));
51 53
52 perf_session__delete(session); 54 perf_session__delete(session);
53 55
@@ -57,13 +59,15 @@ static int session_write_header(char *path)
57static int check_cpu_topology(char *path, struct cpu_map *map) 59static int check_cpu_topology(char *path, struct cpu_map *map)
58{ 60{
59 struct perf_session *session; 61 struct perf_session *session;
60 struct perf_data_file file = { 62 struct perf_data data = {
61 .path = path, 63 .file = {
62 .mode = PERF_DATA_MODE_READ, 64 .path = path,
65 },
66 .mode = PERF_DATA_MODE_READ,
63 }; 67 };
64 int i; 68 int i;
65 69
66 session = perf_session__new(&file, false, NULL); 70 session = perf_session__new(&data, false, NULL);
67 TEST_ASSERT_VAL("can't get session", session); 71 TEST_ASSERT_VAL("can't get session", session);
68 72
69 for (i = 0; i < session->header.env.nr_cpus_avail; i++) { 73 for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 175d633c6b49..066bbf0f4a74 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -3,5 +3,7 @@ libperf-y += fcntl.o
3ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) 3ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
4libperf-y += ioctl.o 4libperf-y += ioctl.o
5endif 5endif
6libperf-y += kcmp.o
6libperf-y += pkey_alloc.o 7libperf-y += pkey_alloc.o
8libperf-y += prctl.o
7libperf-y += statx.o 9libperf-y += statx.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index d80655cd1881..a6dfd04beaee 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -4,6 +4,7 @@
4 4
5#include <linux/kernel.h> 5#include <linux/kernel.h>
6#include <linux/types.h> 6#include <linux/types.h>
7#include <sys/types.h>
7 8
8struct strarray { 9struct strarray {
9 int offset; 10 int offset;
@@ -27,6 +28,8 @@ size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const cha
27struct trace; 28struct trace;
28struct thread; 29struct thread;
29 30
31size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size);
32
30/** 33/**
31 * @val: value of syscall argument being formatted 34 * @val: value of syscall argument being formatted
32 * @args: All the args, use syscall_args__val(arg, nth) to access one 35 * @args: All the args, use syscall_args__val(arg, nth) to access one
@@ -79,12 +82,27 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar
79size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); 82size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg);
80#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd 83#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd
81 84
85size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg);
86#define SCA_KCMP_TYPE syscall_arg__scnprintf_kcmp_type
87
88size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
89#define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
90
82size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); 91size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
83#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights 92#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
84 93
85size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); 94size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg);
86#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 95#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
87 96
97size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg);
98#define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option
99
100size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg);
101#define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2
102
103size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg);
104#define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3
105
88size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); 106size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
89#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags 107#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
90 108
diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c
new file mode 100644
index 000000000000..f62040eb9d5c
--- /dev/null
+++ b/tools/perf/trace/beauty/kcmp.c
@@ -0,0 +1,44 @@
1/*
2 * trace/beauty/kcmp.c
3 *
4 * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
5 *
6 * Released under the GPL v2. (and only v2, not any later version)
7 */
8
9#include "trace/beauty/beauty.h"
10#include <linux/kernel.h>
11#include <sys/types.h>
12#include <machine.h>
13#include <uapi/linux/kcmp.h>
14
15#include "trace/beauty/generated/kcmp_type_array.c"
16
17size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg)
18{
19 unsigned long fd = arg->val;
20 int type = syscall_arg__val(arg, 2);
21 pid_t pid;
22
23 if (type != KCMP_FILE)
24 return syscall_arg__scnprintf_long(bf, size, arg);
25
26 pid = syscall_arg__val(arg, arg->idx == 3 ? 0 : 1); /* idx1 -> pid1, idx2 -> pid2 */
27 return pid__scnprintf_fd(arg->trace, pid, fd, bf, size);
28}
29
30static size_t kcmp__scnprintf_type(int type, char *bf, size_t size)
31{
32 static DEFINE_STRARRAY(kcmp_types);
33 return strarray__scnprintf(&strarray__kcmp_types, bf, size, "%d", type);
34}
35
36size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg)
37{
38 unsigned long type = arg->val;
39
40 if (type != KCMP_FILE)
41 arg->mask |= (1 << 3) | (1 << 4); /* Ignore idx1 and idx2 */
42
43 return kcmp__scnprintf_type(type, bf, size);
44}
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
new file mode 100755
index 000000000000..40d063b8c082
--- /dev/null
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -0,0 +1,10 @@
1#!/bin/sh
2
3header_dir=$1
4
5printf "static const char *kcmp_types[] = {\n"
6regex='^[[:space:]]+(KCMP_(\w+)),'
7egrep $regex ${header_dir}/kcmp.h | grep -v KCMP_TYPES, | \
8 sed -r "s/$regex/\1 \2/g" | \
9 xargs printf "\t[%s]\t= \"%s\",\n"
10printf "};\n"
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
new file mode 100755
index 000000000000..60ef8640ee70
--- /dev/null
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -0,0 +1,10 @@
1#!/bin/sh
2
3header_dir=$1
4
5printf "static const char *madvise_advices[] = {\n"
6regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
7egrep $regex ${header_dir}/mman-common.h | \
8 sed -r "s/$regex/\2 \1/g" | \
9 sort -n | xargs printf "\t[%s] = \"%s\",\n"
10printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 51f1cea406f5..9e1668b2c5d7 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -95,35 +95,21 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
95 95
96#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags 96#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
97 97
98static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size)
99{
100#include "trace/beauty/generated/madvise_behavior_array.c"
101 static DEFINE_STRARRAY(madvise_advices);
102
103 if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL)
104 return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]);
105
106 return scnprintf(bf, size, "%#", behavior);
107}
108
98static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 109static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
99 struct syscall_arg *arg) 110 struct syscall_arg *arg)
100{ 111{
101 int behavior = arg->val; 112 return madvise__scnprintf_behavior(arg->val, bf, size);
102
103 switch (behavior) {
104#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
105 P_MADV_BHV(NORMAL);
106 P_MADV_BHV(RANDOM);
107 P_MADV_BHV(SEQUENTIAL);
108 P_MADV_BHV(WILLNEED);
109 P_MADV_BHV(DONTNEED);
110 P_MADV_BHV(FREE);
111 P_MADV_BHV(REMOVE);
112 P_MADV_BHV(DONTFORK);
113 P_MADV_BHV(DOFORK);
114 P_MADV_BHV(HWPOISON);
115 P_MADV_BHV(SOFT_OFFLINE);
116 P_MADV_BHV(MERGEABLE);
117 P_MADV_BHV(UNMERGEABLE);
118 P_MADV_BHV(HUGEPAGE);
119 P_MADV_BHV(NOHUGEPAGE);
120 P_MADV_BHV(DONTDUMP);
121 P_MADV_BHV(DODUMP);
122#undef P_MADV_BHV
123 default: break;
124 }
125
126 return scnprintf(bf, size, "%#x", behavior);
127} 113}
128 114
129#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 115#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
new file mode 100644
index 000000000000..246130dad6c4
--- /dev/null
+++ b/tools/perf/trace/beauty/prctl.c
@@ -0,0 +1,82 @@
1/*
2 * trace/beauty/prctl.c
3 *
4 * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
5 *
6 * Released under the GPL v2. (and only v2, not any later version)
7 */
8
9#include "trace/beauty/beauty.h"
10#include <linux/kernel.h>
11#include <uapi/linux/prctl.h>
12
13#include "trace/beauty/generated/prctl_option_array.c"
14
15static size_t prctl__scnprintf_option(int option, char *bf, size_t size)
16{
17 static DEFINE_STRARRAY(prctl_options);
18 return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", option);
19}
20
21static size_t prctl__scnprintf_set_mm(int option, char *bf, size_t size)
22{
23 static DEFINE_STRARRAY(prctl_set_mm_options);
24 return strarray__scnprintf(&strarray__prctl_set_mm_options, bf, size, "%d", option);
25}
26
27size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg)
28{
29 int option = syscall_arg__val(arg, 0);
30
31 if (option == PR_SET_MM)
32 return prctl__scnprintf_set_mm(arg->val, bf, size);
33 /*
34 * We still don't grab the contents of pointers on entry or exit,
35 * so just print them as hex numbers
36 */
37 if (option == PR_SET_NAME)
38 return syscall_arg__scnprintf_hex(bf, size, arg);
39
40 return syscall_arg__scnprintf_long(bf, size, arg);
41}
42
43size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg)
44{
45 int option = syscall_arg__val(arg, 0);
46
47 if (option == PR_SET_MM)
48 return syscall_arg__scnprintf_hex(bf, size, arg);
49
50 return syscall_arg__scnprintf_long(bf, size, arg);
51}
52
53size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg)
54{
55 unsigned long option = arg->val;
56 enum {
57 SPO_ARG2 = (1 << 1),
58 SPO_ARG3 = (1 << 2),
59 SPO_ARG4 = (1 << 3),
60 SPO_ARG5 = (1 << 4),
61 SPO_ARG6 = (1 << 5),
62 };
63 const u8 all_but2 = SPO_ARG3 | SPO_ARG4 | SPO_ARG5 | SPO_ARG6;
64 const u8 all = SPO_ARG2 | all_but2;
65 const u8 masks[] = {
66 [PR_GET_DUMPABLE] = all,
67 [PR_SET_DUMPABLE] = all_but2,
68 [PR_SET_NAME] = all_but2,
69 [PR_GET_CHILD_SUBREAPER] = all_but2,
70 [PR_SET_CHILD_SUBREAPER] = all_but2,
71 [PR_GET_SECUREBITS] = all,
72 [PR_SET_SECUREBITS] = all_but2,
73 [PR_SET_MM] = SPO_ARG4 | SPO_ARG5 | SPO_ARG6,
74 [PR_GET_PDEATHSIG] = all,
75 [PR_SET_PDEATHSIG] = all_but2,
76 };
77
78 if (option < ARRAY_SIZE(masks))
79 arg->mask |= masks[option];
80
81 return prctl__scnprintf_option(option, bf, size);
82}
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
new file mode 100755
index 000000000000..0be4138fbe71
--- /dev/null
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -0,0 +1,17 @@
1#!/bin/sh
2
3header_dir=$1
4
5printf "static const char *prctl_options[] = {\n"
6regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
7egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
8 sed -r "s/$regex/\2 \1/g" | \
9 sort -n | xargs printf "\t[%s] = \"%s\",\n"
10printf "};\n"
11
12printf "static const char *prctl_set_mm_options[] = {\n"
13regex='^#[[:space:]]+define[[:space:]]+PR_SET_MM_(\w+)[[:space:]]*([[:digit:]]+).*'
14egrep $regex ${header_dir}/prctl.h | \
15 sed -r "s/$regex/\2 \1/g" | \
16 sort -n | xargs printf "\t[%s] = \"%s\",\n"
17printf "};\n"
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 628ad5f7eddb..68146f4620a5 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -155,57 +155,9 @@ static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
155 cl->unfolded = unfold ? cl->has_children : false; 155 cl->unfolded = unfold ? cl->has_children : false;
156} 156}
157 157
158static struct inline_node *inline_node__create(struct map *map, u64 ip)
159{
160 struct dso *dso;
161 struct inline_node *node;
162
163 if (map == NULL)
164 return NULL;
165
166 dso = map->dso;
167 if (dso == NULL)
168 return NULL;
169
170 node = dso__parse_addr_inlines(dso,
171 map__rip_2objdump(map, ip));
172
173 return node;
174}
175
176static int inline__count_rows(struct inline_node *node)
177{
178 struct inline_list *ilist;
179 int i = 0;
180
181 if (node == NULL)
182 return 0;
183
184 list_for_each_entry(ilist, &node->val, list) {
185 if ((ilist->filename != NULL) || (ilist->funcname != NULL))
186 i++;
187 }
188
189 return i;
190}
191
192static int callchain_list__inline_rows(struct callchain_list *chain)
193{
194 struct inline_node *node;
195 int rows;
196
197 node = inline_node__create(chain->ms.map, chain->ip);
198 if (node == NULL)
199 return 0;
200
201 rows = inline__count_rows(node);
202 inline_node__delete(node);
203 return rows;
204}
205
206static int callchain_node__count_rows_rb_tree(struct callchain_node *node) 158static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
207{ 159{
208 int n = 0, inline_rows; 160 int n = 0;
209 struct rb_node *nd; 161 struct rb_node *nd;
210 162
211 for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { 163 for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
@@ -216,12 +168,6 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
216 list_for_each_entry(chain, &child->val, list) { 168 list_for_each_entry(chain, &child->val, list) {
217 ++n; 169 ++n;
218 170
219 if (symbol_conf.inline_name) {
220 inline_rows =
221 callchain_list__inline_rows(chain);
222 n += inline_rows;
223 }
224
225 /* We need this because we may not have children */ 171 /* We need this because we may not have children */
226 folded_sign = callchain_list__folded(chain); 172 folded_sign = callchain_list__folded(chain);
227 if (folded_sign == '+') 173 if (folded_sign == '+')
@@ -273,7 +219,7 @@ static int callchain_node__count_rows(struct callchain_node *node)
273{ 219{
274 struct callchain_list *chain; 220 struct callchain_list *chain;
275 bool unfolded = false; 221 bool unfolded = false;
276 int n = 0, inline_rows; 222 int n = 0;
277 223
278 if (callchain_param.mode == CHAIN_FLAT) 224 if (callchain_param.mode == CHAIN_FLAT)
279 return callchain_node__count_flat_rows(node); 225 return callchain_node__count_flat_rows(node);
@@ -282,10 +228,6 @@ static int callchain_node__count_rows(struct callchain_node *node)
282 228
283 list_for_each_entry(chain, &node->val, list) { 229 list_for_each_entry(chain, &node->val, list) {
284 ++n; 230 ++n;
285 if (symbol_conf.inline_name) {
286 inline_rows = callchain_list__inline_rows(chain);
287 n += inline_rows;
288 }
289 231
290 unfolded = chain->unfolded; 232 unfolded = chain->unfolded;
291 } 233 }
@@ -433,19 +375,6 @@ static void hist_entry__init_have_children(struct hist_entry *he)
433 he->init_have_children = true; 375 he->init_have_children = true;
434} 376}
435 377
436static void hist_entry_init_inline_node(struct hist_entry *he)
437{
438 if (he->inline_node)
439 return;
440
441 he->inline_node = inline_node__create(he->ms.map, he->ip);
442
443 if (he->inline_node == NULL)
444 return;
445
446 he->has_children = true;
447}
448
449static bool hist_browser__toggle_fold(struct hist_browser *browser) 378static bool hist_browser__toggle_fold(struct hist_browser *browser)
450{ 379{
451 struct hist_entry *he = browser->he_selection; 380 struct hist_entry *he = browser->he_selection;
@@ -477,12 +406,8 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
477 406
478 if (he->unfolded) { 407 if (he->unfolded) {
479 if (he->leaf) 408 if (he->leaf)
480 if (he->inline_node) 409 he->nr_rows = callchain__count_rows(
481 he->nr_rows = inline__count_rows( 410 &he->sorted_chain);
482 he->inline_node);
483 else
484 he->nr_rows = callchain__count_rows(
485 &he->sorted_chain);
486 else 411 else
487 he->nr_rows = hierarchy_count_rows(browser, he, false); 412 he->nr_rows = hierarchy_count_rows(browser, he, false);
488 413
@@ -842,71 +767,6 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u
842 767
843#define LEVEL_OFFSET_STEP 3 768#define LEVEL_OFFSET_STEP 3
844 769
845static int hist_browser__show_inline(struct hist_browser *browser,
846 struct inline_node *node,
847 unsigned short row,
848 int offset)
849{
850 struct inline_list *ilist;
851 char buf[1024];
852 int color, width, first_row;
853
854 first_row = row;
855 width = browser->b.width - (LEVEL_OFFSET_STEP + 2);
856 list_for_each_entry(ilist, &node->val, list) {
857 if ((ilist->filename != NULL) || (ilist->funcname != NULL)) {
858 color = HE_COLORSET_NORMAL;
859 if (ui_browser__is_current_entry(&browser->b, row))
860 color = HE_COLORSET_SELECTED;
861
862 if (callchain_param.key == CCKEY_ADDRESS ||
863 callchain_param.key == CCKEY_SRCLINE) {
864 if (ilist->filename != NULL)
865 scnprintf(buf, sizeof(buf),
866 "%s:%d (inline)",
867 ilist->filename,
868 ilist->line_nr);
869 else
870 scnprintf(buf, sizeof(buf), "??");
871 } else if (ilist->funcname != NULL)
872 scnprintf(buf, sizeof(buf), "%s (inline)",
873 ilist->funcname);
874 else if (ilist->filename != NULL)
875 scnprintf(buf, sizeof(buf),
876 "%s:%d (inline)",
877 ilist->filename,
878 ilist->line_nr);
879 else
880 scnprintf(buf, sizeof(buf), "??");
881
882 ui_browser__set_color(&browser->b, color);
883 hist_browser__gotorc(browser, row, 0);
884 ui_browser__write_nstring(&browser->b, " ",
885 LEVEL_OFFSET_STEP + offset);
886 ui_browser__write_nstring(&browser->b, buf, width);
887 row++;
888 }
889 }
890
891 return row - first_row;
892}
893
894static size_t show_inline_list(struct hist_browser *browser, struct map *map,
895 u64 ip, int row, int offset)
896{
897 struct inline_node *node;
898 int ret;
899
900 node = inline_node__create(map, ip);
901 if (node == NULL)
902 return 0;
903
904 ret = hist_browser__show_inline(browser, node, row, offset);
905
906 inline_node__delete(node);
907 return ret;
908}
909
910static int hist_browser__show_callchain_list(struct hist_browser *browser, 770static int hist_browser__show_callchain_list(struct hist_browser *browser,
911 struct callchain_node *node, 771 struct callchain_node *node,
912 struct callchain_list *chain, 772 struct callchain_list *chain,
@@ -918,7 +778,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
918 char bf[1024], *alloc_str; 778 char bf[1024], *alloc_str;
919 char buf[64], *alloc_str2; 779 char buf[64], *alloc_str2;
920 const char *str; 780 const char *str;
921 int inline_rows = 0, ret = 1; 781 int ret = 1;
922 782
923 if (arg->row_offset != 0) { 783 if (arg->row_offset != 0) {
924 arg->row_offset--; 784 arg->row_offset--;
@@ -955,12 +815,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
955 free(alloc_str); 815 free(alloc_str);
956 free(alloc_str2); 816 free(alloc_str2);
957 817
958 if (symbol_conf.inline_name) { 818 return ret;
959 inline_rows = show_inline_list(browser, chain->ms.map,
960 chain->ip, row + 1, offset);
961 }
962
963 return ret + inline_rows;
964} 819}
965 820
966static bool check_percent_display(struct rb_node *node, u64 parent_total) 821static bool check_percent_display(struct rb_node *node, u64 parent_total)
@@ -1384,12 +1239,6 @@ static int hist_browser__show_entry(struct hist_browser *browser,
1384 folded_sign = hist_entry__folded(entry); 1239 folded_sign = hist_entry__folded(entry);
1385 } 1240 }
1386 1241
1387 if (symbol_conf.inline_name &&
1388 (!entry->has_children)) {
1389 hist_entry_init_inline_node(entry);
1390 folded_sign = hist_entry__folded(entry);
1391 }
1392
1393 if (row_offset == 0) { 1242 if (row_offset == 0) {
1394 struct hpp_arg arg = { 1243 struct hpp_arg arg = {
1395 .b = &browser->b, 1244 .b = &browser->b,
@@ -1421,8 +1270,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
1421 } 1270 }
1422 1271
1423 if (first) { 1272 if (first) {
1424 if (symbol_conf.use_callchain || 1273 if (symbol_conf.use_callchain) {
1425 symbol_conf.inline_name) {
1426 ui_browser__printf(&browser->b, "%c ", folded_sign); 1274 ui_browser__printf(&browser->b, "%c ", folded_sign);
1427 width -= 2; 1275 width -= 2;
1428 } 1276 }
@@ -1464,15 +1312,11 @@ static int hist_browser__show_entry(struct hist_browser *browser,
1464 .is_current_entry = current_entry, 1312 .is_current_entry = current_entry,
1465 }; 1313 };
1466 1314
1467 if (entry->inline_node) 1315 printed += hist_browser__show_callchain(browser,
1468 printed += hist_browser__show_inline(browser, 1316 entry, 1, row,
1469 entry->inline_node, row, 0); 1317 hist_browser__show_callchain_entry,
1470 else 1318 &arg,
1471 printed += hist_browser__show_callchain(browser, 1319 hist_browser__check_output_full);
1472 entry, 1, row,
1473 hist_browser__show_callchain_entry,
1474 &arg,
1475 hist_browser__check_output_full);
1476 } 1320 }
1477 1321
1478 return printed; 1322 return printed;
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index b5a5df14d702..bbfbc91a0fa4 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -28,13 +28,17 @@ void ui_progress__update(struct ui_progress *p, u64 adv)
28 } 28 }
29} 29}
30 30
31void ui_progress__init(struct ui_progress *p, u64 total, const char *title) 31void __ui_progress__init(struct ui_progress *p, u64 total,
32 const char *title, bool size)
32{ 33{
33 p->curr = 0; 34 p->curr = 0;
34 p->next = p->step = total / 16 ?: 1; 35 p->next = p->step = total / 16 ?: 1;
35 p->total = total; 36 p->total = total;
36 p->title = title; 37 p->title = title;
38 p->size = size;
37 39
40 if (ui_progress__ops->init)
41 ui_progress__ops->init(p);
38} 42}
39 43
40void ui_progress__finish(void) 44void ui_progress__finish(void)
diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
index 594bbe6935dd..4f52c37b2f09 100644
--- a/tools/perf/ui/progress.h
+++ b/tools/perf/ui/progress.h
@@ -9,12 +9,22 @@ void ui_progress__finish(void);
9struct ui_progress { 9struct ui_progress {
10 const char *title; 10 const char *title;
11 u64 curr, next, step, total; 11 u64 curr, next, step, total;
12 bool size;
12}; 13};
13 14
14void ui_progress__init(struct ui_progress *p, u64 total, const char *title); 15void __ui_progress__init(struct ui_progress *p, u64 total,
16 const char *title, bool size);
17
18#define ui_progress__init(p, total, title) \
19 __ui_progress__init(p, total, title, false)
20
21#define ui_progress__init_size(p, total, title) \
22 __ui_progress__init(p, total, title, true)
23
15void ui_progress__update(struct ui_progress *p, u64 adv); 24void ui_progress__update(struct ui_progress *p, u64 adv);
16 25
17struct ui_progress_ops { 26struct ui_progress_ops {
27 void (*init)(struct ui_progress *p);
18 void (*update)(struct ui_progress *p); 28 void (*update)(struct ui_progress *p);
19 void (*finish)(void); 29 void (*finish)(void);
20}; 30};
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index de2810ae16be..25dd1e0ecc58 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -22,64 +22,6 @@ static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
22 return ret; 22 return ret;
23} 23}
24 24
25static size_t inline__fprintf(struct map *map, u64 ip, int left_margin,
26 int depth, int depth_mask, FILE *fp)
27{
28 struct dso *dso;
29 struct inline_node *node;
30 struct inline_list *ilist;
31 int ret = 0, i;
32
33 if (map == NULL)
34 return 0;
35
36 dso = map->dso;
37 if (dso == NULL)
38 return 0;
39
40 node = dso__parse_addr_inlines(dso,
41 map__rip_2objdump(map, ip));
42 if (node == NULL)
43 return 0;
44
45 list_for_each_entry(ilist, &node->val, list) {
46 if ((ilist->filename != NULL) || (ilist->funcname != NULL)) {
47 ret += callchain__fprintf_left_margin(fp, left_margin);
48
49 for (i = 0; i < depth; i++) {
50 if (depth_mask & (1 << i))
51 ret += fprintf(fp, "|");
52 else
53 ret += fprintf(fp, " ");
54 ret += fprintf(fp, " ");
55 }
56
57 if (callchain_param.key == CCKEY_ADDRESS ||
58 callchain_param.key == CCKEY_SRCLINE) {
59 if (ilist->filename != NULL)
60 ret += fprintf(fp, "%s:%d (inline)",
61 ilist->filename,
62 ilist->line_nr);
63 else
64 ret += fprintf(fp, "??");
65 } else if (ilist->funcname != NULL)
66 ret += fprintf(fp, "%s (inline)",
67 ilist->funcname);
68 else if (ilist->filename != NULL)
69 ret += fprintf(fp, "%s:%d (inline)",
70 ilist->filename,
71 ilist->line_nr);
72 else
73 ret += fprintf(fp, "??");
74
75 ret += fprintf(fp, "\n");
76 }
77 }
78
79 inline_node__delete(node);
80 return ret;
81}
82
83static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, 25static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
84 int left_margin) 26 int left_margin)
85{ 27{
@@ -138,9 +80,6 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
138 fputc('\n', fp); 80 fputc('\n', fp);
139 free(alloc_str); 81 free(alloc_str);
140 82
141 if (symbol_conf.inline_name)
142 ret += inline__fprintf(chain->ms.map, chain->ip,
143 left_margin, depth, depth_mask, fp);
144 return ret; 83 return ret;
145} 84}
146 85
@@ -315,13 +254,6 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
315 254
316 if (++entries_printed == callchain_param.print_limit) 255 if (++entries_printed == callchain_param.print_limit)
317 break; 256 break;
318
319 if (symbol_conf.inline_name)
320 ret += inline__fprintf(chain->ms.map,
321 chain->ip,
322 left_margin,
323 0, 0,
324 fp);
325 } 257 }
326 root = &cnode->rb_root; 258 root = &cnode->rb_root;
327 } 259 }
@@ -601,7 +533,6 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
601{ 533{
602 int ret; 534 int ret;
603 int callchain_ret = 0; 535 int callchain_ret = 0;
604 int inline_ret = 0;
605 struct perf_hpp hpp = { 536 struct perf_hpp hpp = {
606 .buf = bf, 537 .buf = bf,
607 .size = size, 538 .size = size,
@@ -623,13 +554,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
623 callchain_ret = hist_entry_callchain__fprintf(he, total_period, 554 callchain_ret = hist_entry_callchain__fprintf(he, total_period,
624 0, fp); 555 0, fp);
625 556
626 if (callchain_ret == 0 && symbol_conf.inline_name) { 557 ret += callchain_ret;
627 inline_ret = inline__fprintf(he->ms.map, he->ip, 0, 0, 0, fp);
628 ret += inline_ret;
629 if (inline_ret > 0)
630 ret += fprintf(fp, "\n");
631 } else
632 ret += callchain_ret;
633 558
634 return ret; 559 return ret;
635} 560}
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
index 236bcb620ae4..bc134b82829d 100644
--- a/tools/perf/ui/tui/progress.c
+++ b/tools/perf/ui/tui/progress.c
@@ -1,13 +1,34 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
2#include "../cache.h" 3#include "../cache.h"
3#include "../progress.h" 4#include "../progress.h"
4#include "../libslang.h" 5#include "../libslang.h"
5#include "../ui.h" 6#include "../ui.h"
6#include "tui.h" 7#include "tui.h"
8#include "units.h"
7#include "../browser.h" 9#include "../browser.h"
8 10
11static void __tui_progress__init(struct ui_progress *p)
12{
13 p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1;
14}
15
16static int get_title(struct ui_progress *p, char *buf, size_t size)
17{
18 char buf_cur[20];
19 char buf_tot[20];
20 int ret;
21
22 ret = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr);
23 ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total);
24
25 return ret + scnprintf(buf, size, "%s [%s/%s]",
26 p->title, buf_cur, buf_tot);
27}
28
9static void tui_progress__update(struct ui_progress *p) 29static void tui_progress__update(struct ui_progress *p)
10{ 30{
31 char buf[100], *title = (char *) p->title;
11 int bar, y; 32 int bar, y;
12 /* 33 /*
13 * FIXME: We should have a per UI backend way of showing progress, 34 * FIXME: We should have a per UI backend way of showing progress,
@@ -19,13 +40,18 @@ static void tui_progress__update(struct ui_progress *p)
19 if (p->total == 0) 40 if (p->total == 0)
20 return; 41 return;
21 42
43 if (p->size) {
44 get_title(p, buf, sizeof(buf));
45 title = buf;
46 }
47
22 ui__refresh_dimensions(false); 48 ui__refresh_dimensions(false);
23 pthread_mutex_lock(&ui__lock); 49 pthread_mutex_lock(&ui__lock);
24 y = SLtt_Screen_Rows / 2 - 2; 50 y = SLtt_Screen_Rows / 2 - 2;
25 SLsmg_set_color(0); 51 SLsmg_set_color(0);
26 SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols); 52 SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
27 SLsmg_gotorc(y++, 1); 53 SLsmg_gotorc(y++, 1);
28 SLsmg_write_string((char *)p->title); 54 SLsmg_write_string(title);
29 SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' '); 55 SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' ');
30 SLsmg_set_color(HE_COLORSET_SELECTED); 56 SLsmg_set_color(HE_COLORSET_SELECTED);
31 bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total; 57 bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total;
@@ -50,8 +76,8 @@ static void tui_progress__finish(void)
50 pthread_mutex_unlock(&ui__lock); 76 pthread_mutex_unlock(&ui__lock);
51} 77}
52 78
53static struct ui_progress_ops tui_progress__ops = 79static struct ui_progress_ops tui_progress__ops = {
54{ 80 .init = __tui_progress__init,
55 .update = tui_progress__update, 81 .update = tui_progress__update,
56 .finish = tui_progress__finish, 82 .finish = tui_progress__finish,
57}; 83};
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 94518c1bf8b6..a3de7916fe63 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -13,6 +13,7 @@ libperf-y += find_bit.o
13libperf-y += kallsyms.o 13libperf-y += kallsyms.o
14libperf-y += levenshtein.o 14libperf-y += levenshtein.o
15libperf-y += llvm-utils.o 15libperf-y += llvm-utils.o
16libperf-y += mmap.o
16libperf-y += memswap.o 17libperf-y += memswap.o
17libperf-y += parse-events.o 18libperf-y += parse-events.o
18libperf-y += perf_regs.o 19libperf-y += perf_regs.o
@@ -34,6 +35,7 @@ libperf-y += dso.o
34libperf-y += symbol.o 35libperf-y += symbol.o
35libperf-y += symbol_fprintf.o 36libperf-y += symbol_fprintf.o
36libperf-y += color.o 37libperf-y += color.o
38libperf-y += metricgroup.o
37libperf-y += header.o 39libperf-y += header.o
38libperf-y += callchain.o 40libperf-y += callchain.o
39libperf-y += values.o 41libperf-y += values.o
@@ -78,6 +80,7 @@ libperf-y += data.o
78libperf-y += tsc.o 80libperf-y += tsc.o
79libperf-y += cloexec.o 81libperf-y += cloexec.o
80libperf-y += call-path.o 82libperf-y += call-path.o
83libperf-y += rwsem.o
81libperf-y += thread-stack.o 84libperf-y += thread-stack.o
82libperf-$(CONFIG_AUXTRACE) += auxtrace.o 85libperf-$(CONFIG_AUXTRACE) += auxtrace.o
83libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 86libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index aa66791b1bfc..da1c4c4a0dd8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -49,10 +49,9 @@ struct arch {
49 void *priv; 49 void *priv;
50 unsigned int model; 50 unsigned int model;
51 unsigned int family; 51 unsigned int family;
52 int (*init)(struct arch *arch); 52 int (*init)(struct arch *arch, char *cpuid);
53 bool (*ins_is_fused)(struct arch *arch, const char *ins1, 53 bool (*ins_is_fused)(struct arch *arch, const char *ins1,
54 const char *ins2); 54 const char *ins2);
55 int (*cpuid_parse)(struct arch *arch, char *cpuid);
56 struct { 55 struct {
57 char comment_char; 56 char comment_char;
58 char skip_functions_char; 57 char skip_functions_char;
@@ -132,10 +131,10 @@ static struct arch architectures[] = {
132 }, 131 },
133 { 132 {
134 .name = "x86", 133 .name = "x86",
134 .init = x86__annotate_init,
135 .instructions = x86__instructions, 135 .instructions = x86__instructions,
136 .nr_instructions = ARRAY_SIZE(x86__instructions), 136 .nr_instructions = ARRAY_SIZE(x86__instructions),
137 .ins_is_fused = x86__ins_is_fused, 137 .ins_is_fused = x86__ins_is_fused,
138 .cpuid_parse = x86__cpuid_parse,
139 .objdump = { 138 .objdump = {
140 .comment_char = '#', 139 .comment_char = '#',
141 }, 140 },
@@ -1457,16 +1456,13 @@ int symbol__disassemble(struct symbol *sym, struct map *map,
1457 *parch = arch; 1456 *parch = arch;
1458 1457
1459 if (arch->init) { 1458 if (arch->init) {
1460 err = arch->init(arch); 1459 err = arch->init(arch, cpuid);
1461 if (err) { 1460 if (err) {
1462 pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); 1461 pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
1463 return err; 1462 return err;
1464 } 1463 }
1465 } 1464 }
1466 1465
1467 if (arch->cpuid_parse && cpuid)
1468 arch->cpuid_parse(arch, cpuid);
1469
1470 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1466 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
1471 symfs_filename, sym->name, map->unmap_ip(map, sym->start), 1467 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
1472 map->unmap_ip(map, sym->end)); 1468 map->unmap_ip(map, sym->end));
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 5547457566a7..a33491416400 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -208,7 +208,7 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues,
208 208
209static void *auxtrace_copy_data(u64 size, struct perf_session *session) 209static void *auxtrace_copy_data(u64 size, struct perf_session *session)
210{ 210{
211 int fd = perf_data_file__fd(session->file); 211 int fd = perf_data__fd(session->data);
212 void *p; 212 void *p;
213 ssize_t ret; 213 ssize_t ret;
214 214
@@ -305,7 +305,7 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
305 if (session->one_mmap) { 305 if (session->one_mmap) {
306 buffer->data = buffer->data_offset - session->one_mmap_offset + 306 buffer->data = buffer->data_offset - session->one_mmap_offset +
307 session->one_mmap_addr; 307 session->one_mmap_addr;
308 } else if (perf_data_file__is_pipe(session->file)) { 308 } else if (perf_data__is_pipe(session->data)) {
309 buffer->data = auxtrace_copy_data(buffer->size, session); 309 buffer->data = auxtrace_copy_data(buffer->size, session);
310 if (!buffer->data) 310 if (!buffer->data)
311 return -ENOMEM; 311 return -ENOMEM;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 6031933d811c..082505d08d72 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -567,6 +567,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
567 call->ip = cursor_node->ip; 567 call->ip = cursor_node->ip;
568 call->ms.sym = cursor_node->sym; 568 call->ms.sym = cursor_node->sym;
569 call->ms.map = map__get(cursor_node->map); 569 call->ms.map = map__get(cursor_node->map);
570 call->srcline = cursor_node->srcline;
570 571
571 if (cursor_node->branch) { 572 if (cursor_node->branch) {
572 call->branch_count = 1; 573 call->branch_count = 1;
@@ -645,103 +646,120 @@ enum match_result {
645 MATCH_GT, 646 MATCH_GT,
646}; 647};
647 648
648static enum match_result match_chain_srcline(struct callchain_cursor_node *node, 649static enum match_result match_chain_strings(const char *left,
649 struct callchain_list *cnode) 650 const char *right)
650{ 651{
651 char *left = NULL;
652 char *right = NULL;
653 enum match_result ret = MATCH_EQ; 652 enum match_result ret = MATCH_EQ;
654 int cmp; 653 int cmp;
655 654
656 if (cnode->ms.map)
657 left = get_srcline(cnode->ms.map->dso,
658 map__rip_2objdump(cnode->ms.map, cnode->ip),
659 cnode->ms.sym, true, false);
660 if (node->map)
661 right = get_srcline(node->map->dso,
662 map__rip_2objdump(node->map, node->ip),
663 node->sym, true, false);
664
665 if (left && right) 655 if (left && right)
666 cmp = strcmp(left, right); 656 cmp = strcmp(left, right);
667 else if (!left && right) 657 else if (!left && right)
668 cmp = 1; 658 cmp = 1;
669 else if (left && !right) 659 else if (left && !right)
670 cmp = -1; 660 cmp = -1;
671 else if (cnode->ip == node->ip)
672 cmp = 0;
673 else 661 else
674 cmp = (cnode->ip < node->ip) ? -1 : 1; 662 return MATCH_ERROR;
675 663
676 if (cmp != 0) 664 if (cmp != 0)
677 ret = cmp < 0 ? MATCH_LT : MATCH_GT; 665 ret = cmp < 0 ? MATCH_LT : MATCH_GT;
678 666
679 free_srcline(left);
680 free_srcline(right);
681 return ret; 667 return ret;
682} 668}
683 669
670/*
671 * We need to always use relative addresses because we're aggregating
672 * callchains from multiple threads, i.e. different address spaces, so
673 * comparing absolute addresses make no sense as a symbol in a DSO may end up
674 * in a different address when used in a different binary or even the same
675 * binary but with some sort of address randomization technique, thus we need
676 * to compare just relative addresses. -acme
677 */
678static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
679 struct map *right_map, u64 right_ip)
680{
681 struct dso *left_dso = left_map ? left_map->dso : NULL;
682 struct dso *right_dso = right_map ? right_map->dso : NULL;
683
684 if (left_dso != right_dso)
685 return left_dso < right_dso ? MATCH_LT : MATCH_GT;
686
687 if (left_ip != right_ip)
688 return left_ip < right_ip ? MATCH_LT : MATCH_GT;
689
690 return MATCH_EQ;
691}
692
684static enum match_result match_chain(struct callchain_cursor_node *node, 693static enum match_result match_chain(struct callchain_cursor_node *node,
685 struct callchain_list *cnode) 694 struct callchain_list *cnode)
686{ 695{
687 struct symbol *sym = node->sym; 696 enum match_result match = MATCH_ERROR;
688 u64 left, right;
689 struct dso *left_dso = NULL;
690 struct dso *right_dso = NULL;
691
692 if (callchain_param.key == CCKEY_SRCLINE) {
693 enum match_result match = match_chain_srcline(node, cnode);
694 697
698 switch (callchain_param.key) {
699 case CCKEY_SRCLINE:
700 match = match_chain_strings(cnode->srcline, node->srcline);
695 if (match != MATCH_ERROR) 701 if (match != MATCH_ERROR)
696 return match; 702 break;
697 } 703 /* otherwise fall-back to symbol-based comparison below */
698 704 __fallthrough;
699 if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { 705 case CCKEY_FUNCTION:
700 left = cnode->ms.sym->start; 706 if (node->sym && cnode->ms.sym) {
701 right = sym->start; 707 /*
702 left_dso = cnode->ms.map->dso; 708 * Compare inlined frames based on their symbol name
703 right_dso = node->map->dso; 709 * because different inlined frames will have the same
704 } else { 710 * symbol start. Otherwise do a faster comparison based
705 left = cnode->ip; 711 * on the symbol start address.
706 right = node->ip; 712 */
713 if (cnode->ms.sym->inlined || node->sym->inlined) {
714 match = match_chain_strings(cnode->ms.sym->name,
715 node->sym->name);
716 if (match != MATCH_ERROR)
717 break;
718 } else {
719 match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
720 node->map, node->sym->start);
721 break;
722 }
723 }
724 /* otherwise fall-back to IP-based comparison below */
725 __fallthrough;
726 case CCKEY_ADDRESS:
727 default:
728 match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->map, node->ip);
729 break;
707 } 730 }
708 731
709 if (left == right && left_dso == right_dso) { 732 if (match == MATCH_EQ && node->branch) {
710 if (node->branch) { 733 cnode->branch_count++;
711 cnode->branch_count++;
712 734
713 if (node->branch_from) { 735 if (node->branch_from) {
714 /* 736 /*
715 * It's "to" of a branch 737 * It's "to" of a branch
716 */ 738 */
717 cnode->brtype_stat.branch_to = true; 739 cnode->brtype_stat.branch_to = true;
718 740
719 if (node->branch_flags.predicted) 741 if (node->branch_flags.predicted)
720 cnode->predicted_count++; 742 cnode->predicted_count++;
721 743
722 if (node->branch_flags.abort) 744 if (node->branch_flags.abort)
723 cnode->abort_count++; 745 cnode->abort_count++;
724 746
725 branch_type_count(&cnode->brtype_stat, 747 branch_type_count(&cnode->brtype_stat,
726 &node->branch_flags, 748 &node->branch_flags,
727 node->branch_from, 749 node->branch_from,
728 node->ip); 750 node->ip);
729 } else { 751 } else {
730 /* 752 /*
731 * It's "from" of a branch 753 * It's "from" of a branch
732 */ 754 */
733 cnode->brtype_stat.branch_to = false; 755 cnode->brtype_stat.branch_to = false;
734 cnode->cycles_count += 756 cnode->cycles_count += node->branch_flags.cycles;
735 node->branch_flags.cycles; 757 cnode->iter_count += node->nr_loop_iter;
736 cnode->iter_count += node->nr_loop_iter; 758 cnode->iter_cycles += node->iter_cycles;
737 cnode->iter_cycles += node->iter_cycles;
738 }
739 } 759 }
740
741 return MATCH_EQ;
742 } 760 }
743 761
744 return left > right ? MATCH_GT : MATCH_LT; 762 return match;
745} 763}
746 764
747/* 765/*
@@ -970,7 +988,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
970 list_for_each_entry_safe(list, next_list, &src->val, list) { 988 list_for_each_entry_safe(list, next_list, &src->val, list) {
971 callchain_cursor_append(cursor, list->ip, 989 callchain_cursor_append(cursor, list->ip,
972 list->ms.map, list->ms.sym, 990 list->ms.map, list->ms.sym,
973 false, NULL, 0, 0, 0); 991 false, NULL, 0, 0, 0, list->srcline);
974 list_del(&list->list); 992 list_del(&list->list);
975 map__zput(list->ms.map); 993 map__zput(list->ms.map);
976 free(list); 994 free(list);
@@ -1010,7 +1028,8 @@ int callchain_merge(struct callchain_cursor *cursor,
1010int callchain_cursor_append(struct callchain_cursor *cursor, 1028int callchain_cursor_append(struct callchain_cursor *cursor,
1011 u64 ip, struct map *map, struct symbol *sym, 1029 u64 ip, struct map *map, struct symbol *sym,
1012 bool branch, struct branch_flags *flags, 1030 bool branch, struct branch_flags *flags,
1013 int nr_loop_iter, u64 iter_cycles, u64 branch_from) 1031 int nr_loop_iter, u64 iter_cycles, u64 branch_from,
1032 const char *srcline)
1014{ 1033{
1015 struct callchain_cursor_node *node = *cursor->last; 1034 struct callchain_cursor_node *node = *cursor->last;
1016 1035
@@ -1029,6 +1048,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
1029 node->branch = branch; 1048 node->branch = branch;
1030 node->nr_loop_iter = nr_loop_iter; 1049 node->nr_loop_iter = nr_loop_iter;
1031 node->iter_cycles = iter_cycles; 1050 node->iter_cycles = iter_cycles;
1051 node->srcline = srcline;
1032 1052
1033 if (flags) 1053 if (flags)
1034 memcpy(&node->branch_flags, flags, 1054 memcpy(&node->branch_flags, flags,
@@ -1071,10 +1091,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
1071{ 1091{
1072 al->map = node->map; 1092 al->map = node->map;
1073 al->sym = node->sym; 1093 al->sym = node->sym;
1074 if (node->map) 1094 al->srcline = node->srcline;
1075 al->addr = node->map->map_ip(node->map, node->ip); 1095 al->addr = node->ip;
1076 else
1077 al->addr = node->ip;
1078 1096
1079 if (al->sym == NULL) { 1097 if (al->sym == NULL) {
1080 if (hide_unresolved) 1098 if (hide_unresolved)
@@ -1116,16 +1134,15 @@ char *callchain_list__sym_name(struct callchain_list *cl,
1116 int printed; 1134 int printed;
1117 1135
1118 if (cl->ms.sym) { 1136 if (cl->ms.sym) {
1119 if (show_srcline && cl->ms.map && !cl->srcline) 1137 const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
1120 cl->srcline = get_srcline(cl->ms.map->dso, 1138
1121 map__rip_2objdump(cl->ms.map, 1139 if (show_srcline && cl->srcline)
1122 cl->ip), 1140 printed = scnprintf(bf, bfsize, "%s %s%s",
1123 cl->ms.sym, false, show_addr); 1141 cl->ms.sym->name, cl->srcline,
1124 if (cl->srcline) 1142 inlined);
1125 printed = scnprintf(bf, bfsize, "%s %s",
1126 cl->ms.sym->name, cl->srcline);
1127 else 1143 else
1128 printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); 1144 printed = scnprintf(bf, bfsize, "%s%s",
1145 cl->ms.sym->name, inlined);
1129 } else 1146 } else
1130 printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); 1147 printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
1131 1148
@@ -1533,7 +1550,7 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
1533 node->branch, &node->branch_flags, 1550 node->branch, &node->branch_flags,
1534 node->nr_loop_iter, 1551 node->nr_loop_iter,
1535 node->iter_cycles, 1552 node->iter_cycles,
1536 node->branch_from); 1553 node->branch_from, node->srcline);
1537 if (rc) 1554 if (rc)
1538 break; 1555 break;
1539 1556
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index f967aa47d0a1..b79ef2478a57 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -122,7 +122,7 @@ struct callchain_list {
122 u64 iter_count; 122 u64 iter_count;
123 u64 iter_cycles; 123 u64 iter_cycles;
124 struct branch_type_stat brtype_stat; 124 struct branch_type_stat brtype_stat;
125 char *srcline; 125 const char *srcline;
126 struct list_head list; 126 struct list_head list;
127}; 127};
128 128
@@ -136,6 +136,7 @@ struct callchain_cursor_node {
136 u64 ip; 136 u64 ip;
137 struct map *map; 137 struct map *map;
138 struct symbol *sym; 138 struct symbol *sym;
139 const char *srcline;
139 bool branch; 140 bool branch;
140 struct branch_flags branch_flags; 141 struct branch_flags branch_flags;
141 u64 branch_from; 142 u64 branch_from;
@@ -202,7 +203,8 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
202int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, 203int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
203 struct map *map, struct symbol *sym, 204 struct map *map, struct symbol *sym,
204 bool branch, struct branch_flags *flags, 205 bool branch, struct branch_flags *flags,
205 int nr_loop_iter, u64 iter_cycles, u64 branch_from); 206 int nr_loop_iter, u64 iter_cycles, u64 branch_from,
207 const char *srcline);
206 208
207/* Close a cursor writing session. Initialize for the reader */ 209/* Close a cursor writing session. Initialize for the reader */
208static inline void callchain_cursor_commit(struct callchain_cursor *cursor) 210static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 8808570f8e9c..7798a2cc8a86 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -6,6 +6,7 @@
6#include <stdio.h> 6#include <stdio.h>
7#include <string.h> 7#include <string.h>
8#include <linux/refcount.h> 8#include <linux/refcount.h>
9#include "rwsem.h"
9 10
10struct comm_str { 11struct comm_str {
11 char *str; 12 char *str;
@@ -15,6 +16,7 @@ struct comm_str {
15 16
16/* Should perhaps be moved to struct machine */ 17/* Should perhaps be moved to struct machine */
17static struct rb_root comm_str_root; 18static struct rb_root comm_str_root;
19static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
18 20
19static struct comm_str *comm_str__get(struct comm_str *cs) 21static struct comm_str *comm_str__get(struct comm_str *cs)
20{ 22{
@@ -26,7 +28,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs)
26static void comm_str__put(struct comm_str *cs) 28static void comm_str__put(struct comm_str *cs)
27{ 29{
28 if (cs && refcount_dec_and_test(&cs->refcnt)) { 30 if (cs && refcount_dec_and_test(&cs->refcnt)) {
31 down_write(&comm_str_lock);
29 rb_erase(&cs->rb_node, &comm_str_root); 32 rb_erase(&cs->rb_node, &comm_str_root);
33 up_write(&comm_str_lock);
30 zfree(&cs->str); 34 zfree(&cs->str);
31 free(cs); 35 free(cs);
32 } 36 }
@@ -51,7 +55,8 @@ static struct comm_str *comm_str__alloc(const char *str)
51 return cs; 55 return cs;
52} 56}
53 57
54static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) 58static
59struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
55{ 60{
56 struct rb_node **p = &root->rb_node; 61 struct rb_node **p = &root->rb_node;
57 struct rb_node *parent = NULL; 62 struct rb_node *parent = NULL;
@@ -82,6 +87,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
82 return new; 87 return new;
83} 88}
84 89
90static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
91{
92 struct comm_str *cs;
93
94 down_write(&comm_str_lock);
95 cs = __comm_str__findnew(str, root);
96 up_write(&comm_str_lock);
97
98 return cs;
99}
100
85struct comm *comm__new(const char *str, u64 timestamp, bool exec) 101struct comm *comm__new(const char *str, u64 timestamp, bool exec)
86{ 102{
87 struct comm *comm = zalloc(sizeof(*comm)); 103 struct comm *comm = zalloc(sizeof(*comm));
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 4b893c622236..84eb9393c7db 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -701,10 +701,7 @@ struct perf_config_set *perf_config_set__new(void)
701 701
702 if (set) { 702 if (set) {
703 INIT_LIST_HEAD(&set->sections); 703 INIT_LIST_HEAD(&set->sections);
704 if (perf_config_set__init(set) < 0) { 704 perf_config_set__init(set);
705 perf_config_set__delete(set);
706 set = NULL;
707 }
708 } 705 }
709 706
710 return set; 707 return set;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 2346cecb8ea2..5744c12641a5 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1577,10 +1577,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
1577 struct perf_data_convert_opts *opts) 1577 struct perf_data_convert_opts *opts)
1578{ 1578{
1579 struct perf_session *session; 1579 struct perf_session *session;
1580 struct perf_data_file file = { 1580 struct perf_data data = {
1581 .path = input, 1581 .file.path = input,
1582 .mode = PERF_DATA_MODE_READ, 1582 .mode = PERF_DATA_MODE_READ,
1583 .force = opts->force, 1583 .force = opts->force,
1584 }; 1584 };
1585 struct convert c = { 1585 struct convert c = {
1586 .tool = { 1586 .tool = {
@@ -1619,7 +1619,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
1619 1619
1620 err = -1; 1620 err = -1;
1621 /* perf.data session */ 1621 /* perf.data session */
1622 session = perf_session__new(&file, 0, &c.tool); 1622 session = perf_session__new(&data, 0, &c.tool);
1623 if (!session) 1623 if (!session)
1624 goto free_writer; 1624 goto free_writer;
1625 1625
@@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
1650 1650
1651 fprintf(stderr, 1651 fprintf(stderr,
1652 "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", 1652 "[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
1653 file.path, path); 1653 data.file.path, path);
1654 1654
1655 fprintf(stderr, 1655 fprintf(stderr,
1656 "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", 1656 "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 79192758bdb3..48094fde0a68 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -4,6 +4,7 @@
4#include <sys/types.h> 4#include <sys/types.h>
5#include <sys/stat.h> 5#include <sys/stat.h>
6#include <errno.h> 6#include <errno.h>
7#include <fcntl.h>
7#include <unistd.h> 8#include <unistd.h>
8#include <string.h> 9#include <string.h>
9 10
@@ -21,56 +22,56 @@
21#endif 22#endif
22#endif 23#endif
23 24
24static bool check_pipe(struct perf_data_file *file) 25static bool check_pipe(struct perf_data *data)
25{ 26{
26 struct stat st; 27 struct stat st;
27 bool is_pipe = false; 28 bool is_pipe = false;
28 int fd = perf_data_file__is_read(file) ? 29 int fd = perf_data__is_read(data) ?
29 STDIN_FILENO : STDOUT_FILENO; 30 STDIN_FILENO : STDOUT_FILENO;
30 31
31 if (!file->path) { 32 if (!data->file.path) {
32 if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) 33 if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
33 is_pipe = true; 34 is_pipe = true;
34 } else { 35 } else {
35 if (!strcmp(file->path, "-")) 36 if (!strcmp(data->file.path, "-"))
36 is_pipe = true; 37 is_pipe = true;
37 } 38 }
38 39
39 if (is_pipe) 40 if (is_pipe)
40 file->fd = fd; 41 data->file.fd = fd;
41 42
42 return file->is_pipe = is_pipe; 43 return data->is_pipe = is_pipe;
43} 44}
44 45
45static int check_backup(struct perf_data_file *file) 46static int check_backup(struct perf_data *data)
46{ 47{
47 struct stat st; 48 struct stat st;
48 49
49 if (!stat(file->path, &st) && st.st_size) { 50 if (!stat(data->file.path, &st) && st.st_size) {
50 /* TODO check errors properly */ 51 /* TODO check errors properly */
51 char oldname[PATH_MAX]; 52 char oldname[PATH_MAX];
52 snprintf(oldname, sizeof(oldname), "%s.old", 53 snprintf(oldname, sizeof(oldname), "%s.old",
53 file->path); 54 data->file.path);
54 unlink(oldname); 55 unlink(oldname);
55 rename(file->path, oldname); 56 rename(data->file.path, oldname);
56 } 57 }
57 58
58 return 0; 59 return 0;
59} 60}
60 61
61static int open_file_read(struct perf_data_file *file) 62static int open_file_read(struct perf_data *data)
62{ 63{
63 struct stat st; 64 struct stat st;
64 int fd; 65 int fd;
65 char sbuf[STRERR_BUFSIZE]; 66 char sbuf[STRERR_BUFSIZE];
66 67
67 fd = open(file->path, O_RDONLY); 68 fd = open(data->file.path, O_RDONLY);
68 if (fd < 0) { 69 if (fd < 0) {
69 int err = errno; 70 int err = errno;
70 71
71 pr_err("failed to open %s: %s", file->path, 72 pr_err("failed to open %s: %s", data->file.path,
72 str_error_r(err, sbuf, sizeof(sbuf))); 73 str_error_r(err, sbuf, sizeof(sbuf)));
73 if (err == ENOENT && !strcmp(file->path, "perf.data")) 74 if (err == ENOENT && !strcmp(data->file.path, "perf.data"))
74 pr_err(" (try 'perf record' first)"); 75 pr_err(" (try 'perf record' first)");
75 pr_err("\n"); 76 pr_err("\n");
76 return -err; 77 return -err;
@@ -79,19 +80,19 @@ static int open_file_read(struct perf_data_file *file)
79 if (fstat(fd, &st) < 0) 80 if (fstat(fd, &st) < 0)
80 goto out_close; 81 goto out_close;
81 82
82 if (!file->force && st.st_uid && (st.st_uid != geteuid())) { 83 if (!data->force && st.st_uid && (st.st_uid != geteuid())) {
83 pr_err("File %s not owned by current user or root (use -f to override)\n", 84 pr_err("File %s not owned by current user or root (use -f to override)\n",
84 file->path); 85 data->file.path);
85 goto out_close; 86 goto out_close;
86 } 87 }
87 88
88 if (!st.st_size) { 89 if (!st.st_size) {
89 pr_info("zero-sized file (%s), nothing to do!\n", 90 pr_info("zero-sized data (%s), nothing to do!\n",
90 file->path); 91 data->file.path);
91 goto out_close; 92 goto out_close;
92 } 93 }
93 94
94 file->size = st.st_size; 95 data->size = st.st_size;
95 return fd; 96 return fd;
96 97
97 out_close: 98 out_close:
@@ -99,49 +100,49 @@ static int open_file_read(struct perf_data_file *file)
99 return -1; 100 return -1;
100} 101}
101 102
102static int open_file_write(struct perf_data_file *file) 103static int open_file_write(struct perf_data *data)
103{ 104{
104 int fd; 105 int fd;
105 char sbuf[STRERR_BUFSIZE]; 106 char sbuf[STRERR_BUFSIZE];
106 107
107 if (check_backup(file)) 108 if (check_backup(data))
108 return -1; 109 return -1;
109 110
110 fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, 111 fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
111 S_IRUSR|S_IWUSR); 112 S_IRUSR|S_IWUSR);
112 113
113 if (fd < 0) 114 if (fd < 0)
114 pr_err("failed to open %s : %s\n", file->path, 115 pr_err("failed to open %s : %s\n", data->file.path,
115 str_error_r(errno, sbuf, sizeof(sbuf))); 116 str_error_r(errno, sbuf, sizeof(sbuf)));
116 117
117 return fd; 118 return fd;
118} 119}
119 120
120static int open_file(struct perf_data_file *file) 121static int open_file(struct perf_data *data)
121{ 122{
122 int fd; 123 int fd;
123 124
124 fd = perf_data_file__is_read(file) ? 125 fd = perf_data__is_read(data) ?
125 open_file_read(file) : open_file_write(file); 126 open_file_read(data) : open_file_write(data);
126 127
127 file->fd = fd; 128 data->file.fd = fd;
128 return fd < 0 ? -1 : 0; 129 return fd < 0 ? -1 : 0;
129} 130}
130 131
131int perf_data_file__open(struct perf_data_file *file) 132int perf_data__open(struct perf_data *data)
132{ 133{
133 if (check_pipe(file)) 134 if (check_pipe(data))
134 return 0; 135 return 0;
135 136
136 if (!file->path) 137 if (!data->file.path)
137 file->path = "perf.data"; 138 data->file.path = "perf.data";
138 139
139 return open_file(file); 140 return open_file(data);
140} 141}
141 142
142void perf_data_file__close(struct perf_data_file *file) 143void perf_data__close(struct perf_data *data)
143{ 144{
144 close(file->fd); 145 close(data->file.fd);
145} 146}
146 147
147ssize_t perf_data_file__write(struct perf_data_file *file, 148ssize_t perf_data_file__write(struct perf_data_file *file,
@@ -150,42 +151,48 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
150 return writen(file->fd, buf, size); 151 return writen(file->fd, buf, size);
151} 152}
152 153
153int perf_data_file__switch(struct perf_data_file *file, 154ssize_t perf_data__write(struct perf_data *data,
155 void *buf, size_t size)
156{
157 return perf_data_file__write(&data->file, buf, size);
158}
159
160int perf_data__switch(struct perf_data *data,
154 const char *postfix, 161 const char *postfix,
155 size_t pos, bool at_exit) 162 size_t pos, bool at_exit)
156{ 163{
157 char *new_filepath; 164 char *new_filepath;
158 int ret; 165 int ret;
159 166
160 if (check_pipe(file)) 167 if (check_pipe(data))
161 return -EINVAL; 168 return -EINVAL;
162 if (perf_data_file__is_read(file)) 169 if (perf_data__is_read(data))
163 return -EINVAL; 170 return -EINVAL;
164 171
165 if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0) 172 if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0)
166 return -ENOMEM; 173 return -ENOMEM;
167 174
168 /* 175 /*
169 * Only fire a warning, don't return error, continue fill 176 * Only fire a warning, don't return error, continue fill
170 * original file. 177 * original file.
171 */ 178 */
172 if (rename(file->path, new_filepath)) 179 if (rename(data->file.path, new_filepath))
173 pr_warning("Failed to rename %s to %s\n", file->path, new_filepath); 180 pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath);
174 181
175 if (!at_exit) { 182 if (!at_exit) {
176 close(file->fd); 183 close(data->file.fd);
177 ret = perf_data_file__open(file); 184 ret = perf_data__open(data);
178 if (ret < 0) 185 if (ret < 0)
179 goto out; 186 goto out;
180 187
181 if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) { 188 if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
182 ret = -errno; 189 ret = -errno;
183 pr_debug("Failed to lseek to %zu: %s", 190 pr_debug("Failed to lseek to %zu: %s",
184 pos, strerror(errno)); 191 pos, strerror(errno));
185 goto out; 192 goto out;
186 } 193 }
187 } 194 }
188 ret = file->fd; 195 ret = data->file.fd;
189out: 196out:
190 free(new_filepath); 197 free(new_filepath);
191 return ret; 198 return ret;
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 80241ba78101..4828f7feea89 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -10,51 +10,57 @@ enum perf_data_mode {
10}; 10};
11 11
12struct perf_data_file { 12struct perf_data_file {
13 const char *path; 13 const char *path;
14 int fd; 14 int fd;
15};
16
17struct perf_data {
18 struct perf_data_file file;
15 bool is_pipe; 19 bool is_pipe;
16 bool force; 20 bool force;
17 unsigned long size; 21 unsigned long size;
18 enum perf_data_mode mode; 22 enum perf_data_mode mode;
19}; 23};
20 24
21static inline bool perf_data_file__is_read(struct perf_data_file *file) 25static inline bool perf_data__is_read(struct perf_data *data)
22{ 26{
23 return file->mode == PERF_DATA_MODE_READ; 27 return data->mode == PERF_DATA_MODE_READ;
24} 28}
25 29
26static inline bool perf_data_file__is_write(struct perf_data_file *file) 30static inline bool perf_data__is_write(struct perf_data *data)
27{ 31{
28 return file->mode == PERF_DATA_MODE_WRITE; 32 return data->mode == PERF_DATA_MODE_WRITE;
29} 33}
30 34
31static inline int perf_data_file__is_pipe(struct perf_data_file *file) 35static inline int perf_data__is_pipe(struct perf_data *data)
32{ 36{
33 return file->is_pipe; 37 return data->is_pipe;
34} 38}
35 39
36static inline int perf_data_file__fd(struct perf_data_file *file) 40static inline int perf_data__fd(struct perf_data *data)
37{ 41{
38 return file->fd; 42 return data->file.fd;
39} 43}
40 44
41static inline unsigned long perf_data_file__size(struct perf_data_file *file) 45static inline unsigned long perf_data__size(struct perf_data *data)
42{ 46{
43 return file->size; 47 return data->size;
44} 48}
45 49
46int perf_data_file__open(struct perf_data_file *file); 50int perf_data__open(struct perf_data *data);
47void perf_data_file__close(struct perf_data_file *file); 51void perf_data__close(struct perf_data *data);
52ssize_t perf_data__write(struct perf_data *data,
53 void *buf, size_t size);
48ssize_t perf_data_file__write(struct perf_data_file *file, 54ssize_t perf_data_file__write(struct perf_data_file *file,
49 void *buf, size_t size); 55 void *buf, size_t size);
50/* 56/*
51 * If at_exit is set, only rename current perf.data to 57 * If at_exit is set, only rename current perf.data to
52 * perf.data.<postfix>, continue write on original file. 58 * perf.data.<postfix>, continue write on original data.
53 * Set at_exit when flushing the last output. 59 * Set at_exit when flushing the last output.
54 * 60 *
55 * Return value is fd of new output. 61 * Return value is fd of new output.
56 */ 62 */
57int perf_data_file__switch(struct perf_data_file *file, 63int perf_data__switch(struct perf_data *data,
58 const char *postfix, 64 const char *postfix,
59 size_t pos, bool at_exit); 65 size_t pos, bool at_exit);
60#endif /* __PERF_DATA_H */ 66#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index dc8b53b6950e..f3a71db83947 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -112,50 +112,53 @@ int dump_printf(const char *fmt, ...)
112 return ret; 112 return ret;
113} 113}
114 114
115static void trace_event_printer(enum binary_printer_ops op, 115static int trace_event_printer(enum binary_printer_ops op,
116 unsigned int val, void *extra) 116 unsigned int val, void *extra, FILE *fp)
117{ 117{
118 const char *color = PERF_COLOR_BLUE; 118 const char *color = PERF_COLOR_BLUE;
119 union perf_event *event = (union perf_event *)extra; 119 union perf_event *event = (union perf_event *)extra;
120 unsigned char ch = (unsigned char)val; 120 unsigned char ch = (unsigned char)val;
121 int printed = 0;
121 122
122 switch (op) { 123 switch (op) {
123 case BINARY_PRINT_DATA_BEGIN: 124 case BINARY_PRINT_DATA_BEGIN:
124 printf("."); 125 printed += fprintf(fp, ".");
125 color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", 126 printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n",
126 event->header.size); 127 event->header.size);
127 break; 128 break;
128 case BINARY_PRINT_LINE_BEGIN: 129 case BINARY_PRINT_LINE_BEGIN:
129 printf("."); 130 printed += fprintf(fp, ".");
130 break; 131 break;
131 case BINARY_PRINT_ADDR: 132 case BINARY_PRINT_ADDR:
132 color_fprintf(stdout, color, " %04x: ", val); 133 printed += color_fprintf(fp, color, " %04x: ", val);
133 break; 134 break;
134 case BINARY_PRINT_NUM_DATA: 135 case BINARY_PRINT_NUM_DATA:
135 color_fprintf(stdout, color, " %02x", val); 136 printed += color_fprintf(fp, color, " %02x", val);
136 break; 137 break;
137 case BINARY_PRINT_NUM_PAD: 138 case BINARY_PRINT_NUM_PAD:
138 color_fprintf(stdout, color, " "); 139 printed += color_fprintf(fp, color, " ");
139 break; 140 break;
140 case BINARY_PRINT_SEP: 141 case BINARY_PRINT_SEP:
141 color_fprintf(stdout, color, " "); 142 printed += color_fprintf(fp, color, " ");
142 break; 143 break;
143 case BINARY_PRINT_CHAR_DATA: 144 case BINARY_PRINT_CHAR_DATA:
144 color_fprintf(stdout, color, "%c", 145 printed += color_fprintf(fp, color, "%c",
145 isprint(ch) ? ch : '.'); 146 isprint(ch) ? ch : '.');
146 break; 147 break;
147 case BINARY_PRINT_CHAR_PAD: 148 case BINARY_PRINT_CHAR_PAD:
148 color_fprintf(stdout, color, " "); 149 printed += color_fprintf(fp, color, " ");
149 break; 150 break;
150 case BINARY_PRINT_LINE_END: 151 case BINARY_PRINT_LINE_END:
151 color_fprintf(stdout, color, "\n"); 152 printed += color_fprintf(fp, color, "\n");
152 break; 153 break;
153 case BINARY_PRINT_DATA_END: 154 case BINARY_PRINT_DATA_END:
154 printf("\n"); 155 printed += fprintf(fp, "\n");
155 break; 156 break;
156 default: 157 default:
157 break; 158 break;
158 } 159 }
160
161 return printed;
159} 162}
160 163
161void trace_event(union perf_event *event) 164void trace_event(union perf_event *event)
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 00c98c968cb1..d5b6f7f5baff 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -7,9 +7,11 @@
7#include <sys/stat.h> 7#include <sys/stat.h>
8#include <unistd.h> 8#include <unistd.h>
9#include <errno.h> 9#include <errno.h>
10#include <fcntl.h>
10#include "compress.h" 11#include "compress.h"
11#include "path.h" 12#include "path.h"
12#include "symbol.h" 13#include "symbol.h"
14#include "srcline.h"
13#include "dso.h" 15#include "dso.h"
14#include "machine.h" 16#include "machine.h"
15#include "auxtrace.h" 17#include "auxtrace.h"
@@ -1201,6 +1203,8 @@ struct dso *dso__new(const char *name)
1201 for (i = 0; i < MAP__NR_TYPES; ++i) 1203 for (i = 0; i < MAP__NR_TYPES; ++i)
1202 dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; 1204 dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
1203 dso->data.cache = RB_ROOT; 1205 dso->data.cache = RB_ROOT;
1206 dso->inlined_nodes = RB_ROOT;
1207 dso->srclines = RB_ROOT;
1204 dso->data.fd = -1; 1208 dso->data.fd = -1;
1205 dso->data.status = DSO_DATA_STATUS_UNKNOWN; 1209 dso->data.status = DSO_DATA_STATUS_UNKNOWN;
1206 dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; 1210 dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1232,6 +1236,10 @@ void dso__delete(struct dso *dso)
1232 if (!RB_EMPTY_NODE(&dso->rb_node)) 1236 if (!RB_EMPTY_NODE(&dso->rb_node))
1233 pr_err("DSO %s is still in rbtree when being deleted!\n", 1237 pr_err("DSO %s is still in rbtree when being deleted!\n",
1234 dso->long_name); 1238 dso->long_name);
1239
1240 /* free inlines first, as they reference symbols */
1241 inlines__tree_delete(&dso->inlined_nodes);
1242 srcline__tree_delete(&dso->srclines);
1235 for (i = 0; i < MAP__NR_TYPES; ++i) 1243 for (i = 0; i < MAP__NR_TYPES; ++i)
1236 symbols__delete(&dso->symbols[i]); 1244 symbols__delete(&dso->symbols[i]);
1237 1245
@@ -1366,9 +1374,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso)
1366 1374
1367void dsos__add(struct dsos *dsos, struct dso *dso) 1375void dsos__add(struct dsos *dsos, struct dso *dso)
1368{ 1376{
1369 pthread_rwlock_wrlock(&dsos->lock); 1377 down_write(&dsos->lock);
1370 __dsos__add(dsos, dso); 1378 __dsos__add(dsos, dso);
1371 pthread_rwlock_unlock(&dsos->lock); 1379 up_write(&dsos->lock);
1372} 1380}
1373 1381
1374struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) 1382struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
@@ -1387,9 +1395,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
1387struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) 1395struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
1388{ 1396{
1389 struct dso *dso; 1397 struct dso *dso;
1390 pthread_rwlock_rdlock(&dsos->lock); 1398 down_read(&dsos->lock);
1391 dso = __dsos__find(dsos, name, cmp_short); 1399 dso = __dsos__find(dsos, name, cmp_short);
1392 pthread_rwlock_unlock(&dsos->lock); 1400 up_read(&dsos->lock);
1393 return dso; 1401 return dso;
1394} 1402}
1395 1403
@@ -1416,9 +1424,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
1416struct dso *dsos__findnew(struct dsos *dsos, const char *name) 1424struct dso *dsos__findnew(struct dsos *dsos, const char *name)
1417{ 1425{
1418 struct dso *dso; 1426 struct dso *dso;
1419 pthread_rwlock_wrlock(&dsos->lock); 1427 down_write(&dsos->lock);
1420 dso = dso__get(__dsos__findnew(dsos, name)); 1428 dso = dso__get(__dsos__findnew(dsos, name));
1421 pthread_rwlock_unlock(&dsos->lock); 1429 up_write(&dsos->lock);
1422 return dso; 1430 return dso;
1423} 1431}
1424 1432
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 926ff2e7f668..c229dbe0277a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -7,7 +7,7 @@
7#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include <sys/types.h> 8#include <sys/types.h>
9#include <stdbool.h> 9#include <stdbool.h>
10#include <pthread.h> 10#include "rwsem.h"
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include "map.h" 13#include "map.h"
@@ -130,7 +130,7 @@ struct dso_cache {
130struct dsos { 130struct dsos {
131 struct list_head head; 131 struct list_head head;
132 struct rb_root root; /* rbtree root sorted by long name */ 132 struct rb_root root; /* rbtree root sorted by long name */
133 pthread_rwlock_t lock; 133 struct rw_semaphore lock;
134}; 134};
135 135
136struct auxtrace_cache; 136struct auxtrace_cache;
@@ -142,6 +142,8 @@ struct dso {
142 struct rb_root *root; /* root of rbtree that rb_node is in */ 142 struct rb_root *root; /* root of rbtree that rb_node is in */
143 struct rb_root symbols[MAP__NR_TYPES]; 143 struct rb_root symbols[MAP__NR_TYPES];
144 struct rb_root symbol_names[MAP__NR_TYPES]; 144 struct rb_root symbol_names[MAP__NR_TYPES];
145 struct rb_root inlined_nodes;
146 struct rb_root srclines;
145 struct { 147 struct {
146 u64 addr; 148 u64 addr;
147 struct symbol *symbol; 149 struct symbol *symbol;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fc690fecbfd6..97a8ef9980db 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <dirent.h> 2#include <dirent.h>
3#include <errno.h> 3#include <errno.h>
4#include <fcntl.h>
4#include <inttypes.h> 5#include <inttypes.h>
5#include <linux/kernel.h> 6#include <linux/kernel.h>
6#include <linux/types.h> 7#include <linux/types.h>
@@ -678,21 +679,21 @@ out:
678 return err; 679 return err;
679} 680}
680 681
681int perf_event__synthesize_threads(struct perf_tool *tool, 682static int __perf_event__synthesize_threads(struct perf_tool *tool,
682 perf_event__handler_t process, 683 perf_event__handler_t process,
683 struct machine *machine, 684 struct machine *machine,
684 bool mmap_data, 685 bool mmap_data,
685 unsigned int proc_map_timeout) 686 unsigned int proc_map_timeout,
687 struct dirent **dirent,
688 int start,
689 int num)
686{ 690{
687 DIR *proc;
688 char proc_path[PATH_MAX];
689 struct dirent *dirent;
690 union perf_event *comm_event, *mmap_event, *fork_event; 691 union perf_event *comm_event, *mmap_event, *fork_event;
691 union perf_event *namespaces_event; 692 union perf_event *namespaces_event;
692 int err = -1; 693 int err = -1;
693 694 char *end;
694 if (machine__is_default_guest(machine)) 695 pid_t pid;
695 return 0; 696 int i;
696 697
697 comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); 698 comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
698 if (comm_event == NULL) 699 if (comm_event == NULL)
@@ -712,31 +713,25 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
712 if (namespaces_event == NULL) 713 if (namespaces_event == NULL)
713 goto out_free_fork; 714 goto out_free_fork;
714 715
715 snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); 716 for (i = start; i < start + num; i++) {
716 proc = opendir(proc_path); 717 if (!isdigit(dirent[i]->d_name[0]))
717 718 continue;
718 if (proc == NULL)
719 goto out_free_namespaces;
720
721 while ((dirent = readdir(proc)) != NULL) {
722 char *end;
723 pid_t pid = strtol(dirent->d_name, &end, 10);
724 719
725 if (*end) /* only interested in proper numerical dirents */ 720 pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
721 /* only interested in proper numerical dirents */
722 if (*end)
726 continue; 723 continue;
727 /* 724 /*
728 * We may race with exiting thread, so don't stop just because 725 * We may race with exiting thread, so don't stop just because
729 * one thread couldn't be synthesized. 726 * one thread couldn't be synthesized.
730 */ 727 */
731 __event__synthesize_thread(comm_event, mmap_event, fork_event, 728 __event__synthesize_thread(comm_event, mmap_event, fork_event,
732 namespaces_event, pid, 1, process, 729 namespaces_event, pid, 1, process,
733 tool, machine, mmap_data, 730 tool, machine, mmap_data,
734 proc_map_timeout); 731 proc_map_timeout);
735 } 732 }
736
737 err = 0; 733 err = 0;
738 closedir(proc); 734
739out_free_namespaces:
740 free(namespaces_event); 735 free(namespaces_event);
741out_free_fork: 736out_free_fork:
742 free(fork_event); 737 free(fork_event);
@@ -748,6 +743,118 @@ out:
748 return err; 743 return err;
749} 744}
750 745
746struct synthesize_threads_arg {
747 struct perf_tool *tool;
748 perf_event__handler_t process;
749 struct machine *machine;
750 bool mmap_data;
751 unsigned int proc_map_timeout;
752 struct dirent **dirent;
753 int num;
754 int start;
755};
756
757static void *synthesize_threads_worker(void *arg)
758{
759 struct synthesize_threads_arg *args = arg;
760
761 __perf_event__synthesize_threads(args->tool, args->process,
762 args->machine, args->mmap_data,
763 args->proc_map_timeout, args->dirent,
764 args->start, args->num);
765 return NULL;
766}
767
768int perf_event__synthesize_threads(struct perf_tool *tool,
769 perf_event__handler_t process,
770 struct machine *machine,
771 bool mmap_data,
772 unsigned int proc_map_timeout,
773 unsigned int nr_threads_synthesize)
774{
775 struct synthesize_threads_arg *args = NULL;
776 pthread_t *synthesize_threads = NULL;
777 char proc_path[PATH_MAX];
778 struct dirent **dirent;
779 int num_per_thread;
780 int m, n, i, j;
781 int thread_nr;
782 int base = 0;
783 int err = -1;
784
785
786 if (machine__is_default_guest(machine))
787 return 0;
788
789 snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
790 n = scandir(proc_path, &dirent, 0, alphasort);
791 if (n < 0)
792 return err;
793
794 if (nr_threads_synthesize == UINT_MAX)
795 thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
796 else
797 thread_nr = nr_threads_synthesize;
798
799 if (thread_nr <= 1) {
800 err = __perf_event__synthesize_threads(tool, process,
801 machine, mmap_data,
802 proc_map_timeout,
803 dirent, base, n);
804 goto free_dirent;
805 }
806 if (thread_nr > n)
807 thread_nr = n;
808
809 synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
810 if (synthesize_threads == NULL)
811 goto free_dirent;
812
813 args = calloc(sizeof(*args), thread_nr);
814 if (args == NULL)
815 goto free_threads;
816
817 num_per_thread = n / thread_nr;
818 m = n % thread_nr;
819 for (i = 0; i < thread_nr; i++) {
820 args[i].tool = tool;
821 args[i].process = process;
822 args[i].machine = machine;
823 args[i].mmap_data = mmap_data;
824 args[i].proc_map_timeout = proc_map_timeout;
825 args[i].dirent = dirent;
826 }
827 for (i = 0; i < m; i++) {
828 args[i].num = num_per_thread + 1;
829 args[i].start = i * args[i].num;
830 }
831 if (i != 0)
832 base = args[i-1].start + args[i-1].num;
833 for (j = i; j < thread_nr; j++) {
834 args[j].num = num_per_thread;
835 args[j].start = base + (j - i) * args[i].num;
836 }
837
838 for (i = 0; i < thread_nr; i++) {
839 if (pthread_create(&synthesize_threads[i], NULL,
840 synthesize_threads_worker, &args[i]))
841 goto out_join;
842 }
843 err = 0;
844out_join:
845 for (i = 0; i < thread_nr; i++)
846 pthread_join(synthesize_threads[i], NULL);
847 free(args);
848free_threads:
849 free(synthesize_threads);
850free_dirent:
851 for (i = 0; i < n; i++)
852 free(dirent[i]);
853 free(dirent);
854
855 return err;
856}
857
751struct process_symbol_args { 858struct process_symbol_args {
752 const char *name; 859 const char *name;
753 u64 start; 860 u64 start;
@@ -1498,6 +1605,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
1498 al->sym = NULL; 1605 al->sym = NULL;
1499 al->cpu = sample->cpu; 1606 al->cpu = sample->cpu;
1500 al->socket = -1; 1607 al->socket = -1;
1608 al->srcline = NULL;
1501 1609
1502 if (al->cpu >= 0) { 1610 if (al->cpu >= 0) {
1503 struct perf_env *env = machine->env; 1611 struct perf_env *env = machine->env;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 5524ee69279c..1ae95efbfb95 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -681,7 +681,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
681int perf_event__synthesize_threads(struct perf_tool *tool, 681int perf_event__synthesize_threads(struct perf_tool *tool,
682 perf_event__handler_t process, 682 perf_event__handler_t process,
683 struct machine *machine, bool mmap_data, 683 struct machine *machine, bool mmap_data,
684 unsigned int proc_map_timeout); 684 unsigned int proc_map_timeout,
685 unsigned int nr_threads_synthesize);
685int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, 686int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
686 perf_event__handler_t process, 687 perf_event__handler_t process,
687 struct machine *machine); 688 struct machine *machine);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6a0d7ffbeba0..c6c891e154a6 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -33,9 +33,6 @@
33#include <linux/log2.h> 33#include <linux/log2.h>
34#include <linux/err.h> 34#include <linux/err.h>
35 35
36static void perf_mmap__munmap(struct perf_mmap *map);
37static void perf_mmap__put(struct perf_mmap *map);
38
39#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 36#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
40#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 37#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
41 38
@@ -704,129 +701,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist)
704 return perf_evlist__set_paused(evlist, false); 701 return perf_evlist__set_paused(evlist, false);
705} 702}
706 703
707/* When check_messup is true, 'end' must points to a good entry */
708static union perf_event *
709perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
710 u64 end, u64 *prev)
711{
712 unsigned char *data = md->base + page_size;
713 union perf_event *event = NULL;
714 int diff = end - start;
715
716 if (check_messup) {
717 /*
718 * If we're further behind than half the buffer, there's a chance
719 * the writer will bite our tail and mess up the samples under us.
720 *
721 * If we somehow ended up ahead of the 'end', we got messed up.
722 *
723 * In either case, truncate and restart at 'end'.
724 */
725 if (diff > md->mask / 2 || diff < 0) {
726 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
727
728 /*
729 * 'end' points to a known good entry, start there.
730 */
731 start = end;
732 diff = 0;
733 }
734 }
735
736 if (diff >= (int)sizeof(event->header)) {
737 size_t size;
738
739 event = (union perf_event *)&data[start & md->mask];
740 size = event->header.size;
741
742 if (size < sizeof(event->header) || diff < (int)size) {
743 event = NULL;
744 goto broken_event;
745 }
746
747 /*
748 * Event straddles the mmap boundary -- header should always
749 * be inside due to u64 alignment of output.
750 */
751 if ((start & md->mask) + size != ((start + size) & md->mask)) {
752 unsigned int offset = start;
753 unsigned int len = min(sizeof(*event), size), cpy;
754 void *dst = md->event_copy;
755
756 do {
757 cpy = min(md->mask + 1 - (offset & md->mask), len);
758 memcpy(dst, &data[offset & md->mask], cpy);
759 offset += cpy;
760 dst += cpy;
761 len -= cpy;
762 } while (len);
763
764 event = (union perf_event *) md->event_copy;
765 }
766
767 start += size;
768 }
769
770broken_event:
771 if (prev)
772 *prev = start;
773
774 return event;
775}
776
777union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup)
778{
779 u64 head;
780 u64 old = md->prev;
781
782 /*
783 * Check if event was unmapped due to a POLLHUP/POLLERR.
784 */
785 if (!refcount_read(&md->refcnt))
786 return NULL;
787
788 head = perf_mmap__read_head(md);
789
790 return perf_mmap__read(md, check_messup, old, head, &md->prev);
791}
792
793union perf_event *
794perf_mmap__read_backward(struct perf_mmap *md)
795{
796 u64 head, end;
797 u64 start = md->prev;
798
799 /*
800 * Check if event was unmapped due to a POLLHUP/POLLERR.
801 */
802 if (!refcount_read(&md->refcnt))
803 return NULL;
804
805 head = perf_mmap__read_head(md);
806 if (!head)
807 return NULL;
808
809 /*
810 * 'head' pointer starts from 0. Kernel minus sizeof(record) form
811 * it each time when kernel writes to it, so in fact 'head' is
812 * negative. 'end' pointer is made manually by adding the size of
813 * the ring buffer to 'head' pointer, means the validate data can
814 * read is the whole ring buffer. If 'end' is positive, the ring
815 * buffer has not fully filled, so we must adjust 'end' to 0.
816 *
817 * However, since both 'head' and 'end' is unsigned, we can't
818 * simply compare 'end' against 0. Here we compare '-head' and
819 * the size of the ring buffer, where -head is the number of bytes
820 * kernel write to the ring buffer.
821 */
822 if (-head < (u64)(md->mask + 1))
823 end = 0;
824 else
825 end = head + md->mask + 1;
826
827 return perf_mmap__read(md, false, start, end, &md->prev);
828}
829
830union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 704union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
831{ 705{
832 struct perf_mmap *md = &evlist->mmap[idx]; 706 struct perf_mmap *md = &evlist->mmap[idx];
@@ -857,96 +731,16 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
857 return perf_evlist__mmap_read_forward(evlist, idx); 731 return perf_evlist__mmap_read_forward(evlist, idx);
858} 732}
859 733
860void perf_mmap__read_catchup(struct perf_mmap *md)
861{
862 u64 head;
863
864 if (!refcount_read(&md->refcnt))
865 return;
866
867 head = perf_mmap__read_head(md);
868 md->prev = head;
869}
870
871void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 734void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
872{ 735{
873 perf_mmap__read_catchup(&evlist->mmap[idx]); 736 perf_mmap__read_catchup(&evlist->mmap[idx]);
874} 737}
875 738
876static bool perf_mmap__empty(struct perf_mmap *md)
877{
878 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
879}
880
881static void perf_mmap__get(struct perf_mmap *map)
882{
883 refcount_inc(&map->refcnt);
884}
885
886static void perf_mmap__put(struct perf_mmap *md)
887{
888 BUG_ON(md->base && refcount_read(&md->refcnt) == 0);
889
890 if (refcount_dec_and_test(&md->refcnt))
891 perf_mmap__munmap(md);
892}
893
894void perf_mmap__consume(struct perf_mmap *md, bool overwrite)
895{
896 if (!overwrite) {
897 u64 old = md->prev;
898
899 perf_mmap__write_tail(md, old);
900 }
901
902 if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md))
903 perf_mmap__put(md);
904}
905
906void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 739void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
907{ 740{
908 perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); 741 perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
909} 742}
910 743
911int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
912 struct auxtrace_mmap_params *mp __maybe_unused,
913 void *userpg __maybe_unused,
914 int fd __maybe_unused)
915{
916 return 0;
917}
918
919void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
920{
921}
922
923void __weak auxtrace_mmap_params__init(
924 struct auxtrace_mmap_params *mp __maybe_unused,
925 off_t auxtrace_offset __maybe_unused,
926 unsigned int auxtrace_pages __maybe_unused,
927 bool auxtrace_overwrite __maybe_unused)
928{
929}
930
931void __weak auxtrace_mmap_params__set_idx(
932 struct auxtrace_mmap_params *mp __maybe_unused,
933 struct perf_evlist *evlist __maybe_unused,
934 int idx __maybe_unused,
935 bool per_cpu __maybe_unused)
936{
937}
938
939static void perf_mmap__munmap(struct perf_mmap *map)
940{
941 if (map->base != NULL) {
942 munmap(map->base, perf_mmap__mmap_len(map));
943 map->base = NULL;
944 map->fd = -1;
945 refcount_set(&map->refcnt, 0);
946 }
947 auxtrace_mmap__munmap(&map->auxtrace_mmap);
948}
949
950static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 744static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
951{ 745{
952 int i; 746 int i;
@@ -995,48 +789,6 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
995 return map; 789 return map;
996} 790}
997 791
998struct mmap_params {
999 int prot;
1000 int mask;
1001 struct auxtrace_mmap_params auxtrace_mp;
1002};
1003
1004static int perf_mmap__mmap(struct perf_mmap *map,
1005 struct mmap_params *mp, int fd)
1006{
1007 /*
1008 * The last one will be done at perf_evlist__mmap_consume(), so that we
1009 * make sure we don't prevent tools from consuming every last event in
1010 * the ring buffer.
1011 *
1012 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
1013 * anymore, but the last events for it are still in the ring buffer,
1014 * waiting to be consumed.
1015 *
1016 * Tools can chose to ignore this at their own discretion, but the
1017 * evlist layer can't just drop it when filtering events in
1018 * perf_evlist__filter_pollfd().
1019 */
1020 refcount_set(&map->refcnt, 2);
1021 map->prev = 0;
1022 map->mask = mp->mask;
1023 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
1024 MAP_SHARED, fd, 0);
1025 if (map->base == MAP_FAILED) {
1026 pr_debug2("failed to mmap perf event ring buffer, error %d\n",
1027 errno);
1028 map->base = NULL;
1029 return -1;
1030 }
1031 map->fd = fd;
1032
1033 if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
1034 &mp->auxtrace_mp, map->base, fd))
1035 return -1;
1036
1037 return 0;
1038}
1039
1040static bool 792static bool
1041perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 793perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
1042 struct perf_evsel *evsel) 794 struct perf_evsel *evsel)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c1750a400bb7..e72ae64c11ac 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -7,12 +7,13 @@
7#include <linux/refcount.h> 7#include <linux/refcount.h>
8#include <linux/list.h> 8#include <linux/list.h>
9#include <api/fd/array.h> 9#include <api/fd/array.h>
10#include <fcntl.h>
10#include <stdio.h> 11#include <stdio.h>
11#include "../perf.h" 12#include "../perf.h"
12#include "event.h" 13#include "event.h"
13#include "evsel.h" 14#include "evsel.h"
15#include "mmap.h"
14#include "util.h" 16#include "util.h"
15#include "auxtrace.h"
16#include <signal.h> 17#include <signal.h>
17#include <unistd.h> 18#include <unistd.h>
18 19
@@ -24,55 +25,6 @@ struct record_opts;
24#define PERF_EVLIST__HLIST_BITS 8 25#define PERF_EVLIST__HLIST_BITS 8
25#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) 26#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
26 27
27/**
28 * struct perf_mmap - perf's ring buffer mmap details
29 *
30 * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
31 */
32struct perf_mmap {
33 void *base;
34 int mask;
35 int fd;
36 refcount_t refcnt;
37 u64 prev;
38 struct auxtrace_mmap auxtrace_mmap;
39 char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
40};
41
42static inline size_t
43perf_mmap__mmap_len(struct perf_mmap *map)
44{
45 return map->mask + 1 + page_size;
46}
47
48/*
49 * State machine of bkw_mmap_state:
50 *
51 * .________________(forbid)_____________.
52 * | V
53 * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
54 * ^ ^ | ^ |
55 * | |__(forbid)____/ |___(forbid)___/|
56 * | |
57 * \_________________(3)_______________/
58 *
59 * NOTREADY : Backward ring buffers are not ready
60 * RUNNING : Backward ring buffers are recording
61 * DATA_PENDING : We are required to collect data from backward ring buffers
62 * EMPTY : We have collected data from backward ring buffers.
63 *
64 * (0): Setup backward ring buffer
65 * (1): Pause ring buffers for reading
66 * (2): Read from ring buffers
67 * (3): Resume ring buffers for recording
68 */
69enum bkw_mmap_state {
70 BKW_MMAP_NOTREADY,
71 BKW_MMAP_RUNNING,
72 BKW_MMAP_DATA_PENDING,
73 BKW_MMAP_EMPTY,
74};
75
76struct perf_evlist { 28struct perf_evlist {
77 struct list_head entries; 29 struct list_head entries;
78 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 30 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
@@ -177,12 +129,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
177 129
178void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); 130void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
179 131
180union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
181union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
182
183void perf_mmap__read_catchup(struct perf_mmap *md);
184void perf_mmap__consume(struct perf_mmap *md, bool overwrite);
185
186union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); 132union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
187 133
188union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, 134union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
@@ -286,25 +232,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
286int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); 232int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
287int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); 233int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
288 234
289static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
290{
291 struct perf_event_mmap_page *pc = mm->base;
292 u64 head = ACCESS_ONCE(pc->data_head);
293 rmb();
294 return head;
295}
296
297static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
298{
299 struct perf_event_mmap_page *pc = md->base;
300
301 /*
302 * ensure all reads are done before we write the tail out.
303 */
304 mb();
305 pc->data_tail = tail;
306}
307
308bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str); 235bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str);
309void perf_evlist__to_front(struct perf_evlist *evlist, 236void perf_evlist__to_front(struct perf_evlist *evlist,
310 struct perf_evsel *move_evsel); 237 struct perf_evsel *move_evsel);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0dccdb89572c..f894893c203d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -683,7 +683,7 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
683 if (!function) { 683 if (!function) {
684 perf_evsel__set_sample_bit(evsel, REGS_USER); 684 perf_evsel__set_sample_bit(evsel, REGS_USER);
685 perf_evsel__set_sample_bit(evsel, STACK_USER); 685 perf_evsel__set_sample_bit(evsel, STACK_USER);
686 attr->sample_regs_user = PERF_REGS_MASK; 686 attr->sample_regs_user |= PERF_REGS_MASK;
687 attr->sample_stack_user = param->dump_size; 687 attr->sample_stack_user = param->dump_size;
688 attr->exclude_callchain_user = 1; 688 attr->exclude_callchain_user = 1;
689 } else { 689 } else {
@@ -936,6 +936,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
936 perf_evsel__set_sample_bit(evsel, REGS_INTR); 936 perf_evsel__set_sample_bit(evsel, REGS_INTR);
937 } 937 }
938 938
939 if (opts->sample_user_regs) {
940 attr->sample_regs_user |= opts->sample_user_regs;
941 perf_evsel__set_sample_bit(evsel, REGS_USER);
942 }
943
939 if (target__has_cpu(&opts->target) || opts->sample_cpu) 944 if (target__has_cpu(&opts->target) || opts->sample_cpu)
940 perf_evsel__set_sample_bit(evsel, CPU); 945 perf_evsel__set_sample_bit(evsel, CPU);
941 946
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b4df79d72329..9277df96ffda 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -69,6 +69,8 @@ struct perf_evsel_config_term {
69 } val; 69 } val;
70}; 70};
71 71
72struct perf_stat_evsel;
73
72/** struct perf_evsel - event selector 74/** struct perf_evsel - event selector
73 * 75 *
74 * @evlist - evlist this evsel is in, if it is in one. 76 * @evlist - evlist this evsel is in, if it is in one.
@@ -102,6 +104,7 @@ struct perf_evsel {
102 const char *unit; 104 const char *unit;
103 struct event_format *tp_format; 105 struct event_format *tp_format;
104 off_t id_offset; 106 off_t id_offset;
107 struct perf_stat_evsel *stats;
105 void *priv; 108 void *priv;
106 u64 db_id; 109 u64 db_id;
107 struct cgroup_sel *cgrp; 110 struct cgroup_sel *cgrp;
@@ -138,6 +141,7 @@ struct perf_evsel {
138 const char * metric_name; 141 const char * metric_name;
139 struct perf_evsel **metric_events; 142 struct perf_evsel **metric_events;
140 bool collect_stat; 143 bool collect_stat;
144 bool weak_group;
141}; 145};
142 146
143union u64_swap { 147union u64_swap {
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 1fd7c2e46db2..06dfb027879d 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -158,7 +158,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
158 } 158 }
159 } 159 }
160 160
161 if (print_dso) { 161 if (print_dso && (!node->sym || !node->sym->inlined)) {
162 printed += fprintf(fp, " ("); 162 printed += fprintf(fp, " (");
163 printed += map__fprintf_dsoname(node->map, fp); 163 printed += map__fprintf_dsoname(node->map, fp);
164 printed += fprintf(fp, ")"); 164 printed += fprintf(fp, ")");
@@ -167,41 +167,12 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
167 if (print_srcline) 167 if (print_srcline)
168 printed += map__fprintf_srcline(node->map, addr, "\n ", fp); 168 printed += map__fprintf_srcline(node->map, addr, "\n ", fp);
169 169
170 if (node->sym && node->sym->inlined)
171 printed += fprintf(fp, " (inlined)");
172
170 if (!print_oneline) 173 if (!print_oneline)
171 printed += fprintf(fp, "\n"); 174 printed += fprintf(fp, "\n");
172 175
173 if (symbol_conf.inline_name && node->map) {
174 struct inline_node *inode;
175
176 addr = map__rip_2objdump(node->map, node->ip),
177 inode = dso__parse_addr_inlines(node->map->dso, addr);
178
179 if (inode) {
180 struct inline_list *ilist;
181
182 list_for_each_entry(ilist, &inode->val, list) {
183 if (print_arrow)
184 printed += fprintf(fp, " <-");
185
186 /* IP is same, just skip it */
187 if (print_ip)
188 printed += fprintf(fp, "%c%16s",
189 s, "");
190 if (print_sym)
191 printed += fprintf(fp, " %s",
192 ilist->funcname);
193 if (print_srcline)
194 printed += fprintf(fp, "\n %s:%d",
195 ilist->filename,
196 ilist->line_nr);
197 if (!print_oneline)
198 printed += fprintf(fp, "\n");
199 }
200
201 inline_node__delete(inode);
202 }
203 }
204
205 if (symbol_conf.bt_stop_list && 176 if (symbol_conf.bt_stop_list &&
206 node->sym && 177 node->sym &&
207 strlist__has_entry(symbol_conf.bt_stop_list, 178 strlist__has_entry(symbol_conf.bt_stop_list,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ba0cea8fef72..7c0e9d587bfa 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1763,7 +1763,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
1763 1763
1764 session = container_of(ff->ph, struct perf_session, header); 1764 session = container_of(ff->ph, struct perf_session, header);
1765 1765
1766 if (session->file->is_pipe) { 1766 if (session->data->is_pipe) {
1767 /* Save events for reading later by print_event_desc, 1767 /* Save events for reading later by print_event_desc,
1768 * since they can't be read again in pipe mode. */ 1768 * since they can't be read again in pipe mode. */
1769 ff->events = events; 1769 ff->events = events;
@@ -1772,7 +1772,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
1772 for (evsel = events; evsel->attr.size; evsel++) 1772 for (evsel = events; evsel->attr.size; evsel++)
1773 perf_evlist__set_event_name(session->evlist, evsel); 1773 perf_evlist__set_event_name(session->evlist, evsel);
1774 1774
1775 if (!session->file->is_pipe) 1775 if (!session->data->is_pipe)
1776 free_event_desc(events); 1776 free_event_desc(events);
1777 1777
1778 return 0; 1778 return 0;
@@ -2249,7 +2249,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
2249{ 2249{
2250 struct header_print_data hd; 2250 struct header_print_data hd;
2251 struct perf_header *header = &session->header; 2251 struct perf_header *header = &session->header;
2252 int fd = perf_data_file__fd(session->file); 2252 int fd = perf_data__fd(session->data);
2253 struct stat st; 2253 struct stat st;
2254 int ret, bit; 2254 int ret, bit;
2255 2255
@@ -2265,7 +2265,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
2265 perf_header__process_sections(header, fd, &hd, 2265 perf_header__process_sections(header, fd, &hd,
2266 perf_file_section__fprintf_info); 2266 perf_file_section__fprintf_info);
2267 2267
2268 if (session->file->is_pipe) 2268 if (session->data->is_pipe)
2269 return 0; 2269 return 0;
2270 2270
2271 fprintf(fp, "# missing features: "); 2271 fprintf(fp, "# missing features: ");
@@ -2758,7 +2758,7 @@ static int perf_header__read_pipe(struct perf_session *session)
2758 struct perf_pipe_file_header f_header; 2758 struct perf_pipe_file_header f_header;
2759 2759
2760 if (perf_file_header__read_pipe(&f_header, header, 2760 if (perf_file_header__read_pipe(&f_header, header,
2761 perf_data_file__fd(session->file), 2761 perf_data__fd(session->data),
2762 session->repipe) < 0) { 2762 session->repipe) < 0) {
2763 pr_debug("incompatible file format\n"); 2763 pr_debug("incompatible file format\n");
2764 return -EINVAL; 2764 return -EINVAL;
@@ -2861,13 +2861,13 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
2861 2861
2862int perf_session__read_header(struct perf_session *session) 2862int perf_session__read_header(struct perf_session *session)
2863{ 2863{
2864 struct perf_data_file *file = session->file; 2864 struct perf_data *data = session->data;
2865 struct perf_header *header = &session->header; 2865 struct perf_header *header = &session->header;
2866 struct perf_file_header f_header; 2866 struct perf_file_header f_header;
2867 struct perf_file_attr f_attr; 2867 struct perf_file_attr f_attr;
2868 u64 f_id; 2868 u64 f_id;
2869 int nr_attrs, nr_ids, i, j; 2869 int nr_attrs, nr_ids, i, j;
2870 int fd = perf_data_file__fd(file); 2870 int fd = perf_data__fd(data);
2871 2871
2872 session->evlist = perf_evlist__new(); 2872 session->evlist = perf_evlist__new();
2873 if (session->evlist == NULL) 2873 if (session->evlist == NULL)
@@ -2875,7 +2875,7 @@ int perf_session__read_header(struct perf_session *session)
2875 2875
2876 session->evlist->env = &header->env; 2876 session->evlist->env = &header->env;
2877 session->machines.host.env = &header->env; 2877 session->machines.host.env = &header->env;
2878 if (perf_data_file__is_pipe(file)) 2878 if (perf_data__is_pipe(data))
2879 return perf_header__read_pipe(session); 2879 return perf_header__read_pipe(session);
2880 2880
2881 if (perf_file_header__read(&f_header, header, fd) < 0) 2881 if (perf_file_header__read(&f_header, header, fd) < 0)
@@ -2890,7 +2890,7 @@ int perf_session__read_header(struct perf_session *session)
2890 if (f_header.data.size == 0) { 2890 if (f_header.data.size == 0) {
2891 pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" 2891 pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
2892 "Was the 'perf record' command properly terminated?\n", 2892 "Was the 'perf record' command properly terminated?\n",
2893 file->path); 2893 data->file.path);
2894 } 2894 }
2895 2895
2896 nr_attrs = f_header.attrs.size / f_header.attr_size; 2896 nr_attrs = f_header.attrs.size / f_header.attr_size;
@@ -3398,7 +3398,7 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
3398 struct perf_session *session) 3398 struct perf_session *session)
3399{ 3399{
3400 ssize_t size_read, padding, size = event->tracing_data.size; 3400 ssize_t size_read, padding, size = event->tracing_data.size;
3401 int fd = perf_data_file__fd(session->file); 3401 int fd = perf_data__fd(session->data);
3402 off_t offset = lseek(fd, 0, SEEK_CUR); 3402 off_t offset = lseek(fd, 0, SEEK_CUR);
3403 char buf[BUFSIZ]; 3403 char buf[BUFSIZ];
3404 3404
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 097473600d94..b6140950301e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -597,6 +597,7 @@ __hists__add_entry(struct hists *hists,
597 .map = al->map, 597 .map = al->map,
598 .sym = al->sym, 598 .sym = al->sym,
599 }, 599 },
600 .srcline = al->srcline ? strdup(al->srcline) : NULL,
600 .socket = al->socket, 601 .socket = al->socket,
601 .cpu = al->cpu, 602 .cpu = al->cpu,
602 .cpumode = al->cpumode, 603 .cpumode = al->cpumode,
@@ -951,6 +952,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
951 .map = al->map, 952 .map = al->map,
952 .sym = al->sym, 953 .sym = al->sym,
953 }, 954 },
955 .srcline = al->srcline ? strdup(al->srcline) : NULL,
954 .parent = iter->parent, 956 .parent = iter->parent,
955 .raw_data = sample->raw_data, 957 .raw_data = sample->raw_data,
956 .raw_size = sample->raw_size, 958 .raw_size = sample->raw_size,
@@ -1142,11 +1144,6 @@ void hist_entry__delete(struct hist_entry *he)
1142 zfree(&he->mem_info); 1144 zfree(&he->mem_info);
1143 } 1145 }
1144 1146
1145 if (he->inline_node) {
1146 inline_node__delete(he->inline_node);
1147 he->inline_node = NULL;
1148 }
1149
1150 zfree(&he->stat_acc); 1147 zfree(&he->stat_acc);
1151 free_srcline(he->srcline); 1148 free_srcline(he->srcline);
1152 if (he->srcfile && he->srcfile[0]) 1149 if (he->srcfile && he->srcfile[0])
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 218ee2bac9a5..5325e65f9711 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -500,7 +500,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
500 } 500 }
501 501
502 if (!buffer->data) { 502 if (!buffer->data) {
503 int fd = perf_data_file__fd(btsq->bts->session->file); 503 int fd = perf_data__fd(btsq->bts->session->data);
504 504
505 buffer->data = auxtrace_buffer__get_data(buffer, fd); 505 buffer->data = auxtrace_buffer__get_data(buffer, fd);
506 if (!buffer->data) { 506 if (!buffer->data) {
@@ -664,10 +664,10 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session,
664 if (!bts->data_queued) { 664 if (!bts->data_queued) {
665 struct auxtrace_buffer *buffer; 665 struct auxtrace_buffer *buffer;
666 off_t data_offset; 666 off_t data_offset;
667 int fd = perf_data_file__fd(session->file); 667 int fd = perf_data__fd(session->data);
668 int err; 668 int err;
669 669
670 if (perf_data_file__is_pipe(session->file)) { 670 if (perf_data__is_pipe(session->data)) {
671 data_offset = 0; 671 data_offset = 0;
672 } else { 672 } else {
673 data_offset = lseek(fd, 0, SEEK_CUR); 673 data_offset = lseek(fd, 0, SEEK_CUR);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b58f9fd1e2ee..23f9ba676df0 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -271,7 +271,7 @@ next:
271 ptq->buffer = buffer; 271 ptq->buffer = buffer;
272 272
273 if (!buffer->data) { 273 if (!buffer->data) {
274 int fd = perf_data_file__fd(ptq->pt->session->file); 274 int fd = perf_data__fd(ptq->pt->session->data);
275 275
276 buffer->data = auxtrace_buffer__get_data(buffer, fd); 276 buffer->data = auxtrace_buffer__get_data(buffer, fd);
277 if (!buffer->data) 277 if (!buffer->data)
@@ -2084,10 +2084,10 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session,
2084 if (!pt->data_queued) { 2084 if (!pt->data_queued) {
2085 struct auxtrace_buffer *buffer; 2085 struct auxtrace_buffer *buffer;
2086 off_t data_offset; 2086 off_t data_offset;
2087 int fd = perf_data_file__fd(session->file); 2087 int fd = perf_data__fd(session->data);
2088 int err; 2088 int err;
2089 2089
2090 if (perf_data_file__is_pipe(session->file)) { 2090 if (perf_data__is_pipe(session->data)) {
2091 data_offset = 0; 2091 data_offset = 0;
2092 } else { 2092 } else {
2093 data_offset = lseek(fd, 0, SEEK_CUR); 2093 data_offset = lseek(fd, 0, SEEK_CUR);
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index c2582fa9fe21..6817ffc2a059 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h
@@ -4,7 +4,7 @@
4 4
5#include <data.h> 5#include <data.h>
6 6
7int jit_process(struct perf_session *session, struct perf_data_file *output, 7int jit_process(struct perf_session *session, struct perf_data *output,
8 struct machine *machine, char *filename, pid_t pid, u64 *nbytes); 8 struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
9 9
10int jit_inject_record(const char *filename); 10int jit_inject_record(const char *filename);
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 36483db032e8..a1863000e972 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -30,7 +30,7 @@
30#include "sane_ctype.h" 30#include "sane_ctype.h"
31 31
32struct jit_buf_desc { 32struct jit_buf_desc {
33 struct perf_data_file *output; 33 struct perf_data *output;
34 struct perf_session *session; 34 struct perf_session *session;
35 struct machine *machine; 35 struct machine *machine;
36 union jr_entry *entry; 36 union jr_entry *entry;
@@ -61,8 +61,8 @@ struct debug_line_info {
61 61
62struct jit_tool { 62struct jit_tool {
63 struct perf_tool tool; 63 struct perf_tool tool;
64 struct perf_data_file output; 64 struct perf_data output;
65 struct perf_data_file input; 65 struct perf_data input;
66 u64 bytes_written; 66 u64 bytes_written;
67}; 67};
68 68
@@ -357,7 +357,7 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
357{ 357{
358 ssize_t size; 358 ssize_t size;
359 359
360 size = perf_data_file__write(jd->output, event, event->header.size); 360 size = perf_data__write(jd->output, event, event->header.size);
361 if (size < 0) 361 if (size < 0)
362 return -1; 362 return -1;
363 363
@@ -752,7 +752,7 @@ jit_detect(char *mmap_name, pid_t pid)
752 752
753int 753int
754jit_process(struct perf_session *session, 754jit_process(struct perf_session *session,
755 struct perf_data_file *output, 755 struct perf_data *output,
756 struct machine *machine, 756 struct machine *machine,
757 char *filename, 757 char *filename,
758 pid_t pid, 758 pid_t pid,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index bd5d5b5e2218..6a8d03c3d9b7 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -31,7 +31,21 @@ static void dsos__init(struct dsos *dsos)
31{ 31{
32 INIT_LIST_HEAD(&dsos->head); 32 INIT_LIST_HEAD(&dsos->head);
33 dsos->root = RB_ROOT; 33 dsos->root = RB_ROOT;
34 pthread_rwlock_init(&dsos->lock, NULL); 34 init_rwsem(&dsos->lock);
35}
36
37static void machine__threads_init(struct machine *machine)
38{
39 int i;
40
41 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
42 struct threads *threads = &machine->threads[i];
43 threads->entries = RB_ROOT;
44 init_rwsem(&threads->lock);
45 threads->nr = 0;
46 INIT_LIST_HEAD(&threads->dead);
47 threads->last_match = NULL;
48 }
35} 49}
36 50
37int machine__init(struct machine *machine, const char *root_dir, pid_t pid) 51int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
@@ -41,11 +55,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
41 RB_CLEAR_NODE(&machine->rb_node); 55 RB_CLEAR_NODE(&machine->rb_node);
42 dsos__init(&machine->dsos); 56 dsos__init(&machine->dsos);
43 57
44 machine->threads = RB_ROOT; 58 machine__threads_init(machine);
45 pthread_rwlock_init(&machine->threads_lock, NULL);
46 machine->nr_threads = 0;
47 INIT_LIST_HEAD(&machine->dead_threads);
48 machine->last_match = NULL;
49 59
50 machine->vdso_info = NULL; 60 machine->vdso_info = NULL;
51 machine->env = NULL; 61 machine->env = NULL;
@@ -121,7 +131,7 @@ static void dsos__purge(struct dsos *dsos)
121{ 131{
122 struct dso *pos, *n; 132 struct dso *pos, *n;
123 133
124 pthread_rwlock_wrlock(&dsos->lock); 134 down_write(&dsos->lock);
125 135
126 list_for_each_entry_safe(pos, n, &dsos->head, node) { 136 list_for_each_entry_safe(pos, n, &dsos->head, node) {
127 RB_CLEAR_NODE(&pos->rb_node); 137 RB_CLEAR_NODE(&pos->rb_node);
@@ -130,39 +140,49 @@ static void dsos__purge(struct dsos *dsos)
130 dso__put(pos); 140 dso__put(pos);
131 } 141 }
132 142
133 pthread_rwlock_unlock(&dsos->lock); 143 up_write(&dsos->lock);
134} 144}
135 145
136static void dsos__exit(struct dsos *dsos) 146static void dsos__exit(struct dsos *dsos)
137{ 147{
138 dsos__purge(dsos); 148 dsos__purge(dsos);
139 pthread_rwlock_destroy(&dsos->lock); 149 exit_rwsem(&dsos->lock);
140} 150}
141 151
142void machine__delete_threads(struct machine *machine) 152void machine__delete_threads(struct machine *machine)
143{ 153{
144 struct rb_node *nd; 154 struct rb_node *nd;
155 int i;
145 156
146 pthread_rwlock_wrlock(&machine->threads_lock); 157 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
147 nd = rb_first(&machine->threads); 158 struct threads *threads = &machine->threads[i];
148 while (nd) { 159 down_write(&threads->lock);
149 struct thread *t = rb_entry(nd, struct thread, rb_node); 160 nd = rb_first(&threads->entries);
161 while (nd) {
162 struct thread *t = rb_entry(nd, struct thread, rb_node);
150 163
151 nd = rb_next(nd); 164 nd = rb_next(nd);
152 __machine__remove_thread(machine, t, false); 165 __machine__remove_thread(machine, t, false);
166 }
167 up_write(&threads->lock);
153 } 168 }
154 pthread_rwlock_unlock(&machine->threads_lock);
155} 169}
156 170
157void machine__exit(struct machine *machine) 171void machine__exit(struct machine *machine)
158{ 172{
173 int i;
174
159 machine__destroy_kernel_maps(machine); 175 machine__destroy_kernel_maps(machine);
160 map_groups__exit(&machine->kmaps); 176 map_groups__exit(&machine->kmaps);
161 dsos__exit(&machine->dsos); 177 dsos__exit(&machine->dsos);
162 machine__exit_vdso(machine); 178 machine__exit_vdso(machine);
163 zfree(&machine->root_dir); 179 zfree(&machine->root_dir);
164 zfree(&machine->current_tid); 180 zfree(&machine->current_tid);
165 pthread_rwlock_destroy(&machine->threads_lock); 181
182 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
183 struct threads *threads = &machine->threads[i];
184 exit_rwsem(&threads->lock);
185 }
166} 186}
167 187
168void machine__delete(struct machine *machine) 188void machine__delete(struct machine *machine)
@@ -380,10 +400,11 @@ out_err:
380 * lookup/new thread inserted. 400 * lookup/new thread inserted.
381 */ 401 */
382static struct thread *____machine__findnew_thread(struct machine *machine, 402static struct thread *____machine__findnew_thread(struct machine *machine,
403 struct threads *threads,
383 pid_t pid, pid_t tid, 404 pid_t pid, pid_t tid,
384 bool create) 405 bool create)
385{ 406{
386 struct rb_node **p = &machine->threads.rb_node; 407 struct rb_node **p = &threads->entries.rb_node;
387 struct rb_node *parent = NULL; 408 struct rb_node *parent = NULL;
388 struct thread *th; 409 struct thread *th;
389 410
@@ -392,14 +413,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
392 * so most of the time we dont have to look up 413 * so most of the time we dont have to look up
393 * the full rbtree: 414 * the full rbtree:
394 */ 415 */
395 th = machine->last_match; 416 th = threads->last_match;
396 if (th != NULL) { 417 if (th != NULL) {
397 if (th->tid == tid) { 418 if (th->tid == tid) {
398 machine__update_thread_pid(machine, th, pid); 419 machine__update_thread_pid(machine, th, pid);
399 return thread__get(th); 420 return thread__get(th);
400 } 421 }
401 422
402 machine->last_match = NULL; 423 threads->last_match = NULL;
403 } 424 }
404 425
405 while (*p != NULL) { 426 while (*p != NULL) {
@@ -407,7 +428,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
407 th = rb_entry(parent, struct thread, rb_node); 428 th = rb_entry(parent, struct thread, rb_node);
408 429
409 if (th->tid == tid) { 430 if (th->tid == tid) {
410 machine->last_match = th; 431 threads->last_match = th;
411 machine__update_thread_pid(machine, th, pid); 432 machine__update_thread_pid(machine, th, pid);
412 return thread__get(th); 433 return thread__get(th);
413 } 434 }
@@ -424,7 +445,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
424 th = thread__new(pid, tid); 445 th = thread__new(pid, tid);
425 if (th != NULL) { 446 if (th != NULL) {
426 rb_link_node(&th->rb_node, parent, p); 447 rb_link_node(&th->rb_node, parent, p);
427 rb_insert_color(&th->rb_node, &machine->threads); 448 rb_insert_color(&th->rb_node, &threads->entries);
428 449
429 /* 450 /*
430 * We have to initialize map_groups separately 451 * We have to initialize map_groups separately
@@ -435,7 +456,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
435 * leader and that would screwed the rb tree. 456 * leader and that would screwed the rb tree.
436 */ 457 */
437 if (thread__init_map_groups(th, machine)) { 458 if (thread__init_map_groups(th, machine)) {
438 rb_erase_init(&th->rb_node, &machine->threads); 459 rb_erase_init(&th->rb_node, &threads->entries);
439 RB_CLEAR_NODE(&th->rb_node); 460 RB_CLEAR_NODE(&th->rb_node);
440 thread__put(th); 461 thread__put(th);
441 return NULL; 462 return NULL;
@@ -444,8 +465,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
444 * It is now in the rbtree, get a ref 465 * It is now in the rbtree, get a ref
445 */ 466 */
446 thread__get(th); 467 thread__get(th);
447 machine->last_match = th; 468 threads->last_match = th;
448 ++machine->nr_threads; 469 ++threads->nr;
449 } 470 }
450 471
451 return th; 472 return th;
@@ -453,27 +474,30 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
453 474
454struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) 475struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
455{ 476{
456 return ____machine__findnew_thread(machine, pid, tid, true); 477 return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
457} 478}
458 479
459struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, 480struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
460 pid_t tid) 481 pid_t tid)
461{ 482{
483 struct threads *threads = machine__threads(machine, tid);
462 struct thread *th; 484 struct thread *th;
463 485
464 pthread_rwlock_wrlock(&machine->threads_lock); 486 down_write(&threads->lock);
465 th = __machine__findnew_thread(machine, pid, tid); 487 th = __machine__findnew_thread(machine, pid, tid);
466 pthread_rwlock_unlock(&machine->threads_lock); 488 up_write(&threads->lock);
467 return th; 489 return th;
468} 490}
469 491
470struct thread *machine__find_thread(struct machine *machine, pid_t pid, 492struct thread *machine__find_thread(struct machine *machine, pid_t pid,
471 pid_t tid) 493 pid_t tid)
472{ 494{
495 struct threads *threads = machine__threads(machine, tid);
473 struct thread *th; 496 struct thread *th;
474 pthread_rwlock_rdlock(&machine->threads_lock); 497
475 th = ____machine__findnew_thread(machine, pid, tid, false); 498 down_read(&threads->lock);
476 pthread_rwlock_unlock(&machine->threads_lock); 499 th = ____machine__findnew_thread(machine, threads, pid, tid, false);
500 up_read(&threads->lock);
477 return th; 501 return th;
478} 502}
479 503
@@ -565,7 +589,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
565{ 589{
566 struct dso *dso; 590 struct dso *dso;
567 591
568 pthread_rwlock_wrlock(&machine->dsos.lock); 592 down_write(&machine->dsos.lock);
569 593
570 dso = __dsos__find(&machine->dsos, m->name, true); 594 dso = __dsos__find(&machine->dsos, m->name, true);
571 if (!dso) { 595 if (!dso) {
@@ -579,7 +603,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
579 603
580 dso__get(dso); 604 dso__get(dso);
581out_unlock: 605out_unlock:
582 pthread_rwlock_unlock(&machine->dsos.lock); 606 up_write(&machine->dsos.lock);
583 return dso; 607 return dso;
584} 608}
585 609
@@ -720,21 +744,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
720 744
721size_t machine__fprintf(struct machine *machine, FILE *fp) 745size_t machine__fprintf(struct machine *machine, FILE *fp)
722{ 746{
723 size_t ret;
724 struct rb_node *nd; 747 struct rb_node *nd;
748 size_t ret;
749 int i;
725 750
726 pthread_rwlock_rdlock(&machine->threads_lock); 751 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
752 struct threads *threads = &machine->threads[i];
727 753
728 ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); 754 down_read(&threads->lock);
729 755
730 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { 756 ret = fprintf(fp, "Threads: %u\n", threads->nr);
731 struct thread *pos = rb_entry(nd, struct thread, rb_node);
732 757
733 ret += thread__fprintf(pos, fp); 758 for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
734 } 759 struct thread *pos = rb_entry(nd, struct thread, rb_node);
735 760
736 pthread_rwlock_unlock(&machine->threads_lock); 761 ret += thread__fprintf(pos, fp);
762 }
737 763
764 up_read(&threads->lock);
765 }
738 return ret; 766 return ret;
739} 767}
740 768
@@ -1293,7 +1321,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1293 struct dso *kernel = NULL; 1321 struct dso *kernel = NULL;
1294 struct dso *dso; 1322 struct dso *dso;
1295 1323
1296 pthread_rwlock_rdlock(&machine->dsos.lock); 1324 down_read(&machine->dsos.lock);
1297 1325
1298 list_for_each_entry(dso, &machine->dsos.head, node) { 1326 list_for_each_entry(dso, &machine->dsos.head, node) {
1299 1327
@@ -1323,7 +1351,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1323 break; 1351 break;
1324 } 1352 }
1325 1353
1326 pthread_rwlock_unlock(&machine->dsos.lock); 1354 up_read(&machine->dsos.lock);
1327 1355
1328 if (kernel == NULL) 1356 if (kernel == NULL)
1329 kernel = machine__findnew_dso(machine, kmmap_prefix); 1357 kernel = machine__findnew_dso(machine, kmmap_prefix);
@@ -1480,23 +1508,25 @@ out_problem:
1480 1508
1481static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) 1509static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
1482{ 1510{
1483 if (machine->last_match == th) 1511 struct threads *threads = machine__threads(machine, th->tid);
1484 machine->last_match = NULL; 1512
1513 if (threads->last_match == th)
1514 threads->last_match = NULL;
1485 1515
1486 BUG_ON(refcount_read(&th->refcnt) == 0); 1516 BUG_ON(refcount_read(&th->refcnt) == 0);
1487 if (lock) 1517 if (lock)
1488 pthread_rwlock_wrlock(&machine->threads_lock); 1518 down_write(&threads->lock);
1489 rb_erase_init(&th->rb_node, &machine->threads); 1519 rb_erase_init(&th->rb_node, &threads->entries);
1490 RB_CLEAR_NODE(&th->rb_node); 1520 RB_CLEAR_NODE(&th->rb_node);
1491 --machine->nr_threads; 1521 --threads->nr;
1492 /* 1522 /*
1493 * Move it first to the dead_threads list, then drop the reference, 1523 * Move it first to the dead_threads list, then drop the reference,
1494 * if this is the last reference, then the thread__delete destructor 1524 * if this is the last reference, then the thread__delete destructor
1495 * will be called and we will remove it from the dead_threads list. 1525 * will be called and we will remove it from the dead_threads list.
1496 */ 1526 */
1497 list_add_tail(&th->node, &machine->dead_threads); 1527 list_add_tail(&th->node, &threads->dead);
1498 if (lock) 1528 if (lock)
1499 pthread_rwlock_unlock(&machine->threads_lock); 1529 up_write(&threads->lock);
1500 thread__put(th); 1530 thread__put(th);
1501} 1531}
1502 1532
@@ -1680,6 +1710,26 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
1680 return mi; 1710 return mi;
1681} 1711}
1682 1712
1713static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
1714{
1715 char *srcline = NULL;
1716
1717 if (!map || callchain_param.key == CCKEY_FUNCTION)
1718 return srcline;
1719
1720 srcline = srcline__tree_find(&map->dso->srclines, ip);
1721 if (!srcline) {
1722 bool show_sym = false;
1723 bool show_addr = callchain_param.key == CCKEY_ADDRESS;
1724
1725 srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
1726 sym, show_sym, show_addr);
1727 srcline__tree_insert(&map->dso->srclines, ip, srcline);
1728 }
1729
1730 return srcline;
1731}
1732
1683struct iterations { 1733struct iterations {
1684 int nr_loop_iter; 1734 int nr_loop_iter;
1685 u64 cycles; 1735 u64 cycles;
@@ -1699,6 +1749,7 @@ static int add_callchain_ip(struct thread *thread,
1699 struct addr_location al; 1749 struct addr_location al;
1700 int nr_loop_iter = 0; 1750 int nr_loop_iter = 0;
1701 u64 iter_cycles = 0; 1751 u64 iter_cycles = 0;
1752 const char *srcline = NULL;
1702 1753
1703 al.filtered = 0; 1754 al.filtered = 0;
1704 al.sym = NULL; 1755 al.sym = NULL;
@@ -1754,9 +1805,10 @@ static int add_callchain_ip(struct thread *thread,
1754 iter_cycles = iter->cycles; 1805 iter_cycles = iter->cycles;
1755 } 1806 }
1756 1807
1808 srcline = callchain_srcline(al.map, al.sym, al.addr);
1757 return callchain_cursor_append(cursor, al.addr, al.map, al.sym, 1809 return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
1758 branch, flags, nr_loop_iter, 1810 branch, flags, nr_loop_iter,
1759 iter_cycles, branch_from); 1811 iter_cycles, branch_from, srcline);
1760} 1812}
1761 1813
1762struct branch_info *sample__resolve_bstack(struct perf_sample *sample, 1814struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -2069,15 +2121,54 @@ check_calls:
2069 return 0; 2121 return 0;
2070} 2122}
2071 2123
2124static int append_inlines(struct callchain_cursor *cursor,
2125 struct map *map, struct symbol *sym, u64 ip)
2126{
2127 struct inline_node *inline_node;
2128 struct inline_list *ilist;
2129 u64 addr;
2130 int ret = 1;
2131
2132 if (!symbol_conf.inline_name || !map || !sym)
2133 return ret;
2134
2135 addr = map__rip_2objdump(map, ip);
2136
2137 inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
2138 if (!inline_node) {
2139 inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
2140 if (!inline_node)
2141 return ret;
2142 inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
2143 }
2144
2145 list_for_each_entry(ilist, &inline_node->val, list) {
2146 ret = callchain_cursor_append(cursor, ip, map,
2147 ilist->symbol, false,
2148 NULL, 0, 0, 0, ilist->srcline);
2149
2150 if (ret != 0)
2151 return ret;
2152 }
2153
2154 return ret;
2155}
2156
2072static int unwind_entry(struct unwind_entry *entry, void *arg) 2157static int unwind_entry(struct unwind_entry *entry, void *arg)
2073{ 2158{
2074 struct callchain_cursor *cursor = arg; 2159 struct callchain_cursor *cursor = arg;
2160 const char *srcline = NULL;
2075 2161
2076 if (symbol_conf.hide_unresolved && entry->sym == NULL) 2162 if (symbol_conf.hide_unresolved && entry->sym == NULL)
2077 return 0; 2163 return 0;
2164
2165 if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
2166 return 0;
2167
2168 srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
2078 return callchain_cursor_append(cursor, entry->ip, 2169 return callchain_cursor_append(cursor, entry->ip,
2079 entry->map, entry->sym, 2170 entry->map, entry->sym,
2080 false, NULL, 0, 0, 0); 2171 false, NULL, 0, 0, 0, srcline);
2081} 2172}
2082 2173
2083static int thread__resolve_callchain_unwind(struct thread *thread, 2174static int thread__resolve_callchain_unwind(struct thread *thread,
@@ -2141,21 +2232,26 @@ int machine__for_each_thread(struct machine *machine,
2141 int (*fn)(struct thread *thread, void *p), 2232 int (*fn)(struct thread *thread, void *p),
2142 void *priv) 2233 void *priv)
2143{ 2234{
2235 struct threads *threads;
2144 struct rb_node *nd; 2236 struct rb_node *nd;
2145 struct thread *thread; 2237 struct thread *thread;
2146 int rc = 0; 2238 int rc = 0;
2239 int i;
2147 2240
2148 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { 2241 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2149 thread = rb_entry(nd, struct thread, rb_node); 2242 threads = &machine->threads[i];
2150 rc = fn(thread, priv); 2243 for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
2151 if (rc != 0) 2244 thread = rb_entry(nd, struct thread, rb_node);
2152 return rc; 2245 rc = fn(thread, priv);
2153 } 2246 if (rc != 0)
2247 return rc;
2248 }
2154 2249
2155 list_for_each_entry(thread, &machine->dead_threads, node) { 2250 list_for_each_entry(thread, &threads->dead, node) {
2156 rc = fn(thread, priv); 2251 rc = fn(thread, priv);
2157 if (rc != 0) 2252 if (rc != 0)
2158 return rc; 2253 return rc;
2254 }
2159 } 2255 }
2160 return rc; 2256 return rc;
2161} 2257}
@@ -2184,12 +2280,16 @@ int machines__for_each_thread(struct machines *machines,
2184int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, 2280int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
2185 struct target *target, struct thread_map *threads, 2281 struct target *target, struct thread_map *threads,
2186 perf_event__handler_t process, bool data_mmap, 2282 perf_event__handler_t process, bool data_mmap,
2187 unsigned int proc_map_timeout) 2283 unsigned int proc_map_timeout,
2284 unsigned int nr_threads_synthesize)
2188{ 2285{
2189 if (target__has_task(target)) 2286 if (target__has_task(target))
2190 return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); 2287 return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
2191 else if (target__has_cpu(target)) 2288 else if (target__has_cpu(target))
2192 return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); 2289 return perf_event__synthesize_threads(tool, process,
2290 machine, data_mmap,
2291 proc_map_timeout,
2292 nr_threads_synthesize);
2193 /* command specified */ 2293 /* command specified */
2194 return 0; 2294 return 0;
2195} 2295}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index d551aa80a59b..5ce860b64c74 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -7,6 +7,7 @@
7#include "map.h" 7#include "map.h"
8#include "dso.h" 8#include "dso.h"
9#include "event.h" 9#include "event.h"
10#include "rwsem.h"
10 11
11struct addr_location; 12struct addr_location;
12struct branch_stack; 13struct branch_stack;
@@ -24,6 +25,17 @@ extern const char *ref_reloc_sym_names[];
24 25
25struct vdso_info; 26struct vdso_info;
26 27
28#define THREADS__TABLE_BITS 8
29#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
30
31struct threads {
32 struct rb_root entries;
33 struct rw_semaphore lock;
34 unsigned int nr;
35 struct list_head dead;
36 struct thread *last_match;
37};
38
27struct machine { 39struct machine {
28 struct rb_node rb_node; 40 struct rb_node rb_node;
29 pid_t pid; 41 pid_t pid;
@@ -31,11 +43,7 @@ struct machine {
31 bool comm_exec; 43 bool comm_exec;
32 bool kptr_restrict_warned; 44 bool kptr_restrict_warned;
33 char *root_dir; 45 char *root_dir;
34 struct rb_root threads; 46 struct threads threads[THREADS__TABLE_SIZE];
35 pthread_rwlock_t threads_lock;
36 unsigned int nr_threads;
37 struct list_head dead_threads;
38 struct thread *last_match;
39 struct vdso_info *vdso_info; 47 struct vdso_info *vdso_info;
40 struct perf_env *env; 48 struct perf_env *env;
41 struct dsos dsos; 49 struct dsos dsos;
@@ -49,6 +57,12 @@ struct machine {
49 }; 57 };
50}; 58};
51 59
60static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
61{
62 /* Cast it to handle tid == -1 */
63 return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
64}
65
52static inline 66static inline
53struct map *__machine__kernel_map(struct machine *machine, enum map_type type) 67struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
54{ 68{
@@ -244,15 +258,18 @@ int machines__for_each_thread(struct machines *machines,
244int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, 258int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
245 struct target *target, struct thread_map *threads, 259 struct target *target, struct thread_map *threads,
246 perf_event__handler_t process, bool data_mmap, 260 perf_event__handler_t process, bool data_mmap,
247 unsigned int proc_map_timeout); 261 unsigned int proc_map_timeout,
262 unsigned int nr_threads_synthesize);
248static inline 263static inline
249int machine__synthesize_threads(struct machine *machine, struct target *target, 264int machine__synthesize_threads(struct machine *machine, struct target *target,
250 struct thread_map *threads, bool data_mmap, 265 struct thread_map *threads, bool data_mmap,
251 unsigned int proc_map_timeout) 266 unsigned int proc_map_timeout,
267 unsigned int nr_threads_synthesize)
252{ 268{
253 return __machine__synthesize_threads(machine, NULL, target, threads, 269 return __machine__synthesize_threads(machine, NULL, target, threads,
254 perf_event__process, data_mmap, 270 perf_event__process, data_mmap,
255 proc_map_timeout); 271 proc_map_timeout,
272 nr_threads_synthesize);
256} 273}
257 274
258pid_t machine__get_current_tid(struct machine *machine, int cpu); 275pid_t machine__get_current_tid(struct machine *machine, int cpu);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 4e7bd2750122..6d40efd74402 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -489,7 +489,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
489static void maps__init(struct maps *maps) 489static void maps__init(struct maps *maps)
490{ 490{
491 maps->entries = RB_ROOT; 491 maps->entries = RB_ROOT;
492 pthread_rwlock_init(&maps->lock, NULL); 492 init_rwsem(&maps->lock);
493} 493}
494 494
495void map_groups__init(struct map_groups *mg, struct machine *machine) 495void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -518,9 +518,9 @@ static void __maps__purge(struct maps *maps)
518 518
519static void maps__exit(struct maps *maps) 519static void maps__exit(struct maps *maps)
520{ 520{
521 pthread_rwlock_wrlock(&maps->lock); 521 down_write(&maps->lock);
522 __maps__purge(maps); 522 __maps__purge(maps);
523 pthread_rwlock_unlock(&maps->lock); 523 up_write(&maps->lock);
524} 524}
525 525
526void map_groups__exit(struct map_groups *mg) 526void map_groups__exit(struct map_groups *mg)
@@ -587,7 +587,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
587 struct symbol *sym; 587 struct symbol *sym;
588 struct rb_node *nd; 588 struct rb_node *nd;
589 589
590 pthread_rwlock_rdlock(&maps->lock); 590 down_read(&maps->lock);
591 591
592 for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { 592 for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
593 struct map *pos = rb_entry(nd, struct map, rb_node); 593 struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -603,7 +603,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
603 603
604 sym = NULL; 604 sym = NULL;
605out: 605out:
606 pthread_rwlock_unlock(&maps->lock); 606 up_read(&maps->lock);
607 return sym; 607 return sym;
608} 608}
609 609
@@ -639,7 +639,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
639 size_t printed = 0; 639 size_t printed = 0;
640 struct rb_node *nd; 640 struct rb_node *nd;
641 641
642 pthread_rwlock_rdlock(&maps->lock); 642 down_read(&maps->lock);
643 643
644 for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { 644 for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
645 struct map *pos = rb_entry(nd, struct map, rb_node); 645 struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -651,7 +651,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
651 } 651 }
652 } 652 }
653 653
654 pthread_rwlock_unlock(&maps->lock); 654 up_read(&maps->lock);
655 655
656 return printed; 656 return printed;
657} 657}
@@ -683,7 +683,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
683 struct rb_node *next; 683 struct rb_node *next;
684 int err = 0; 684 int err = 0;
685 685
686 pthread_rwlock_wrlock(&maps->lock); 686 down_write(&maps->lock);
687 687
688 root = &maps->entries; 688 root = &maps->entries;
689 next = rb_first(root); 689 next = rb_first(root);
@@ -751,7 +751,7 @@ put_map:
751 751
752 err = 0; 752 err = 0;
753out: 753out:
754 pthread_rwlock_unlock(&maps->lock); 754 up_write(&maps->lock);
755 return err; 755 return err;
756} 756}
757 757
@@ -772,7 +772,7 @@ int map_groups__clone(struct thread *thread,
772 struct map *map; 772 struct map *map;
773 struct maps *maps = &parent->maps[type]; 773 struct maps *maps = &parent->maps[type];
774 774
775 pthread_rwlock_rdlock(&maps->lock); 775 down_read(&maps->lock);
776 776
777 for (map = maps__first(maps); map; map = map__next(map)) { 777 for (map = maps__first(maps); map; map = map__next(map)) {
778 struct map *new = map__clone(map); 778 struct map *new = map__clone(map);
@@ -789,7 +789,7 @@ int map_groups__clone(struct thread *thread,
789 789
790 err = 0; 790 err = 0;
791out_unlock: 791out_unlock:
792 pthread_rwlock_unlock(&maps->lock); 792 up_read(&maps->lock);
793 return err; 793 return err;
794} 794}
795 795
@@ -816,9 +816,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
816 816
817void maps__insert(struct maps *maps, struct map *map) 817void maps__insert(struct maps *maps, struct map *map)
818{ 818{
819 pthread_rwlock_wrlock(&maps->lock); 819 down_write(&maps->lock);
820 __maps__insert(maps, map); 820 __maps__insert(maps, map);
821 pthread_rwlock_unlock(&maps->lock); 821 up_write(&maps->lock);
822} 822}
823 823
824static void __maps__remove(struct maps *maps, struct map *map) 824static void __maps__remove(struct maps *maps, struct map *map)
@@ -829,9 +829,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
829 829
830void maps__remove(struct maps *maps, struct map *map) 830void maps__remove(struct maps *maps, struct map *map)
831{ 831{
832 pthread_rwlock_wrlock(&maps->lock); 832 down_write(&maps->lock);
833 __maps__remove(maps, map); 833 __maps__remove(maps, map);
834 pthread_rwlock_unlock(&maps->lock); 834 up_write(&maps->lock);
835} 835}
836 836
837struct map *maps__find(struct maps *maps, u64 ip) 837struct map *maps__find(struct maps *maps, u64 ip)
@@ -839,7 +839,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
839 struct rb_node **p, *parent = NULL; 839 struct rb_node **p, *parent = NULL;
840 struct map *m; 840 struct map *m;
841 841
842 pthread_rwlock_rdlock(&maps->lock); 842 down_read(&maps->lock);
843 843
844 p = &maps->entries.rb_node; 844 p = &maps->entries.rb_node;
845 while (*p != NULL) { 845 while (*p != NULL) {
@@ -855,7 +855,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
855 855
856 m = NULL; 856 m = NULL;
857out: 857out:
858 pthread_rwlock_unlock(&maps->lock); 858 up_read(&maps->lock);
859 return m; 859 return m;
860} 860}
861 861
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 1fb9b8589adc..edeb7291c8e1 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -10,6 +10,7 @@
10#include <stdio.h> 10#include <stdio.h>
11#include <stdbool.h> 11#include <stdbool.h>
12#include <linux/types.h> 12#include <linux/types.h>
13#include "rwsem.h"
13 14
14enum map_type { 15enum map_type {
15 MAP__FUNCTION = 0, 16 MAP__FUNCTION = 0,
@@ -62,7 +63,7 @@ struct kmap {
62 63
63struct maps { 64struct maps {
64 struct rb_root entries; 65 struct rb_root entries;
65 pthread_rwlock_t lock; 66 struct rw_semaphore lock;
66}; 67};
67 68
68struct map_groups { 69struct map_groups {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
new file mode 100644
index 000000000000..0ddd9c199227
--- /dev/null
+++ b/tools/perf/util/metricgroup.c
@@ -0,0 +1,490 @@
1/*
2 * Copyright (c) 2017, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 */
14
15/* Manage metrics and groups of metrics from JSON files */
16
17#include "metricgroup.h"
18#include "evlist.h"
19#include "strbuf.h"
20#include "pmu.h"
21#include "expr.h"
22#include "rblist.h"
23#include "pmu.h"
24#include <string.h>
25#include <stdbool.h>
26#include <errno.h>
27#include "pmu-events/pmu-events.h"
28#include "strbuf.h"
29#include "strlist.h"
30#include <assert.h>
31#include <ctype.h>
32
33struct metric_event *metricgroup__lookup(struct rblist *metric_events,
34 struct perf_evsel *evsel,
35 bool create)
36{
37 struct rb_node *nd;
38 struct metric_event me = {
39 .evsel = evsel
40 };
41 nd = rblist__find(metric_events, &me);
42 if (nd)
43 return container_of(nd, struct metric_event, nd);
44 if (create) {
45 rblist__add_node(metric_events, &me);
46 nd = rblist__find(metric_events, &me);
47 if (nd)
48 return container_of(nd, struct metric_event, nd);
49 }
50 return NULL;
51}
52
53static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
54{
55 struct metric_event *a = container_of(rb_node,
56 struct metric_event,
57 nd);
58 const struct metric_event *b = entry;
59
60 if (a->evsel == b->evsel)
61 return 0;
62 if ((char *)a->evsel < (char *)b->evsel)
63 return -1;
64 return +1;
65}
66
67static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
68 const void *entry)
69{
70 struct metric_event *me = malloc(sizeof(struct metric_event));
71
72 if (!me)
73 return NULL;
74 memcpy(me, entry, sizeof(struct metric_event));
75 me->evsel = ((struct metric_event *)entry)->evsel;
76 INIT_LIST_HEAD(&me->head);
77 return &me->nd;
78}
79
80static void metricgroup__rblist_init(struct rblist *metric_events)
81{
82 rblist__init(metric_events);
83 metric_events->node_cmp = metric_event_cmp;
84 metric_events->node_new = metric_event_new;
85}
86
87struct egroup {
88 struct list_head nd;
89 int idnum;
90 const char **ids;
91 const char *metric_name;
92 const char *metric_expr;
93};
94
95static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist,
96 const char **ids,
97 int idnum,
98 struct perf_evsel **metric_events)
99{
100 struct perf_evsel *ev, *start = NULL;
101 int ind = 0;
102
103 evlist__for_each_entry (perf_evlist, ev) {
104 if (!strcmp(ev->name, ids[ind])) {
105 metric_events[ind] = ev;
106 if (ind == 0)
107 start = ev;
108 if (++ind == idnum) {
109 metric_events[ind] = NULL;
110 return start;
111 }
112 } else {
113 ind = 0;
114 start = NULL;
115 }
116 }
117 /*
118 * This can happen when an alias expands to multiple
119 * events, like for uncore events.
120 * We don't support this case for now.
121 */
122 return NULL;
123}
124
125static int metricgroup__setup_events(struct list_head *groups,
126 struct perf_evlist *perf_evlist,
127 struct rblist *metric_events_list)
128{
129 struct metric_event *me;
130 struct metric_expr *expr;
131 int i = 0;
132 int ret = 0;
133 struct egroup *eg;
134 struct perf_evsel *evsel;
135
136 list_for_each_entry (eg, groups, nd) {
137 struct perf_evsel **metric_events;
138
139 metric_events = calloc(sizeof(void *), eg->idnum + 1);
140 if (!metric_events) {
141 ret = -ENOMEM;
142 break;
143 }
144 evsel = find_evsel(perf_evlist, eg->ids, eg->idnum,
145 metric_events);
146 if (!evsel) {
147 pr_debug("Cannot resolve %s: %s\n",
148 eg->metric_name, eg->metric_expr);
149 continue;
150 }
151 for (i = 0; i < eg->idnum; i++)
152 metric_events[i]->collect_stat = true;
153 me = metricgroup__lookup(metric_events_list, evsel, true);
154 if (!me) {
155 ret = -ENOMEM;
156 break;
157 }
158 expr = malloc(sizeof(struct metric_expr));
159 if (!expr) {
160 ret = -ENOMEM;
161 break;
162 }
163 expr->metric_expr = eg->metric_expr;
164 expr->metric_name = eg->metric_name;
165 expr->metric_events = metric_events;
166 list_add(&expr->nd, &me->head);
167 }
168 return ret;
169}
170
171static bool match_metric(const char *n, const char *list)
172{
173 int len;
174 char *m;
175
176 if (!list)
177 return false;
178 if (!strcmp(list, "all"))
179 return true;
180 if (!n)
181 return !strcasecmp(list, "No_group");
182 len = strlen(list);
183 m = strcasestr(n, list);
184 if (!m)
185 return false;
186 if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
187 (m[len] == 0 || m[len] == ';'))
188 return true;
189 return false;
190}
191
192struct mep {
193 struct rb_node nd;
194 const char *name;
195 struct strlist *metrics;
196};
197
198static int mep_cmp(struct rb_node *rb_node, const void *entry)
199{
200 struct mep *a = container_of(rb_node, struct mep, nd);
201 struct mep *b = (struct mep *)entry;
202
203 return strcmp(a->name, b->name);
204}
205
206static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
207 const void *entry)
208{
209 struct mep *me = malloc(sizeof(struct mep));
210
211 if (!me)
212 return NULL;
213 memcpy(me, entry, sizeof(struct mep));
214 me->name = strdup(me->name);
215 if (!me->name)
216 goto out_me;
217 me->metrics = strlist__new(NULL, NULL);
218 if (!me->metrics)
219 goto out_name;
220 return &me->nd;
221out_name:
222 free((char *)me->name);
223out_me:
224 free(me);
225 return NULL;
226}
227
228static struct mep *mep_lookup(struct rblist *groups, const char *name)
229{
230 struct rb_node *nd;
231 struct mep me = {
232 .name = name
233 };
234 nd = rblist__find(groups, &me);
235 if (nd)
236 return container_of(nd, struct mep, nd);
237 rblist__add_node(groups, &me);
238 nd = rblist__find(groups, &me);
239 if (nd)
240 return container_of(nd, struct mep, nd);
241 return NULL;
242}
243
244static void mep_delete(struct rblist *rl __maybe_unused,
245 struct rb_node *nd)
246{
247 struct mep *me = container_of(nd, struct mep, nd);
248
249 strlist__delete(me->metrics);
250 free((void *)me->name);
251 free(me);
252}
253
254static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
255{
256 struct str_node *sn;
257 int n = 0;
258
259 strlist__for_each_entry (sn, metrics) {
260 if (raw)
261 printf("%s%s", n > 0 ? " " : "", sn->s);
262 else
263 printf(" %s\n", sn->s);
264 n++;
265 }
266 if (raw)
267 putchar('\n');
268}
269
270void metricgroup__print(bool metrics, bool metricgroups, char *filter,
271 bool raw)
272{
273 struct pmu_events_map *map = perf_pmu__find_map();
274 struct pmu_event *pe;
275 int i;
276 struct rblist groups;
277 struct rb_node *node, *next;
278 struct strlist *metriclist = NULL;
279
280 if (!map)
281 return;
282
283 if (!metricgroups) {
284 metriclist = strlist__new(NULL, NULL);
285 if (!metriclist)
286 return;
287 }
288
289 rblist__init(&groups);
290 groups.node_new = mep_new;
291 groups.node_cmp = mep_cmp;
292 groups.node_delete = mep_delete;
293 for (i = 0; ; i++) {
294 const char *g;
295 pe = &map->table[i];
296
297 if (!pe->name && !pe->metric_group && !pe->metric_name)
298 break;
299 if (!pe->metric_expr)
300 continue;
301 g = pe->metric_group;
302 if (!g && pe->metric_name) {
303 if (pe->name)
304 continue;
305 g = "No_group";
306 }
307 if (g) {
308 char *omg;
309 char *mg = strdup(g);
310
311 if (!mg)
312 return;
313 omg = mg;
314 while ((g = strsep(&mg, ";")) != NULL) {
315 struct mep *me;
316 char *s;
317
318 if (*g == 0)
319 g = "No_group";
320 while (isspace(*g))
321 g++;
322 if (filter && !strstr(g, filter))
323 continue;
324 if (raw)
325 s = (char *)pe->metric_name;
326 else {
327 if (asprintf(&s, "%s\n\t[%s]",
328 pe->metric_name, pe->desc) < 0)
329 return;
330 }
331
332 if (!s)
333 continue;
334
335 if (!metricgroups) {
336 strlist__add(metriclist, s);
337 } else {
338 me = mep_lookup(&groups, g);
339 if (!me)
340 continue;
341 strlist__add(me->metrics, s);
342 }
343 }
344 free(omg);
345 }
346 }
347
348 if (metricgroups && !raw)
349 printf("\nMetric Groups:\n\n");
350 else if (metrics && !raw)
351 printf("\nMetrics:\n\n");
352
353 for (node = rb_first(&groups.entries); node; node = next) {
354 struct mep *me = container_of(node, struct mep, nd);
355
356 if (metricgroups)
357 printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n");
358 if (metrics)
359 metricgroup__print_strlist(me->metrics, raw);
360 next = rb_next(node);
361 rblist__remove_node(&groups, node);
362 }
363 if (!metricgroups)
364 metricgroup__print_strlist(metriclist, raw);
365 strlist__delete(metriclist);
366}
367
368static int metricgroup__add_metric(const char *metric, struct strbuf *events,
369 struct list_head *group_list)
370{
371 struct pmu_events_map *map = perf_pmu__find_map();
372 struct pmu_event *pe;
373 int ret = -EINVAL;
374 int i, j;
375
376 if (!map)
377 return 0;
378
379 for (i = 0; ; i++) {
380 pe = &map->table[i];
381
382 if (!pe->name && !pe->metric_group && !pe->metric_name)
383 break;
384 if (!pe->metric_expr)
385 continue;
386 if (match_metric(pe->metric_group, metric) ||
387 match_metric(pe->metric_name, metric)) {
388 const char **ids;
389 int idnum;
390 struct egroup *eg;
391
392 pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
393
394 if (expr__find_other(pe->metric_expr,
395 NULL, &ids, &idnum) < 0)
396 continue;
397 if (events->len > 0)
398 strbuf_addf(events, ",");
399 for (j = 0; j < idnum; j++) {
400 pr_debug("found event %s\n", ids[j]);
401 strbuf_addf(events, "%s%s",
402 j == 0 ? "{" : ",",
403 ids[j]);
404 }
405 strbuf_addf(events, "}:W");
406
407 eg = malloc(sizeof(struct egroup));
408 if (!eg) {
409 ret = -ENOMEM;
410 break;
411 }
412 eg->ids = ids;
413 eg->idnum = idnum;
414 eg->metric_name = pe->metric_name;
415 eg->metric_expr = pe->metric_expr;
416 list_add_tail(&eg->nd, group_list);
417 ret = 0;
418 }
419 }
420 return ret;
421}
422
423static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
424 struct list_head *group_list)
425{
426 char *llist, *nlist, *p;
427 int ret = -EINVAL;
428
429 nlist = strdup(list);
430 if (!nlist)
431 return -ENOMEM;
432 llist = nlist;
433
434 strbuf_init(events, 100);
435 strbuf_addf(events, "%s", "");
436
437 while ((p = strsep(&llist, ",")) != NULL) {
438 ret = metricgroup__add_metric(p, events, group_list);
439 if (ret == -EINVAL) {
440 fprintf(stderr, "Cannot find metric or group `%s'\n",
441 p);
442 break;
443 }
444 }
445 free(nlist);
446 return ret;
447}
448
449static void metricgroup__free_egroups(struct list_head *group_list)
450{
451 struct egroup *eg, *egtmp;
452 int i;
453
454 list_for_each_entry_safe (eg, egtmp, group_list, nd) {
455 for (i = 0; i < eg->idnum; i++)
456 free((char *)eg->ids[i]);
457 free(eg->ids);
458 free(eg);
459 }
460}
461
462int metricgroup__parse_groups(const struct option *opt,
463 const char *str,
464 struct rblist *metric_events)
465{
466 struct parse_events_error parse_error;
467 struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value;
468 struct strbuf extra_events;
469 LIST_HEAD(group_list);
470 int ret;
471
472 if (metric_events->nr_entries == 0)
473 metricgroup__rblist_init(metric_events);
474 ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
475 if (ret)
476 return ret;
477 pr_debug("adding %s\n", extra_events.buf);
478 memset(&parse_error, 0, sizeof(struct parse_events_error));
479 ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
480 if (ret) {
481 parse_events_print_error(&parse_error, extra_events.buf);
482 goto out;
483 }
484 strbuf_release(&extra_events);
485 ret = metricgroup__setup_events(&group_list, perf_evlist,
486 metric_events);
487out:
488 metricgroup__free_egroups(&group_list);
489 return ret;
490}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
new file mode 100644
index 000000000000..06854e125ee7
--- /dev/null
+++ b/tools/perf/util/metricgroup.h
@@ -0,0 +1,31 @@
1#ifndef METRICGROUP_H
2#define METRICGROUP_H 1
3
4#include "linux/list.h"
5#include "rblist.h"
6#include <subcmd/parse-options.h>
7#include "evlist.h"
8#include "strbuf.h"
9
10struct metric_event {
11 struct rb_node nd;
12 struct perf_evsel *evsel;
13 struct list_head head; /* list of metric_expr */
14};
15
16struct metric_expr {
17 struct list_head nd;
18 const char *metric_expr;
19 const char *metric_name;
20 struct perf_evsel **metric_events;
21};
22
23struct metric_event *metricgroup__lookup(struct rblist *metric_events,
24 struct perf_evsel *evsel,
25 bool create);
26int metricgroup__parse_groups(const struct option *opt,
27 const char *str,
28 struct rblist *metric_events);
29
30void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
31#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
new file mode 100644
index 000000000000..9fe5f9c7d577
--- /dev/null
+++ b/tools/perf/util/mmap.c
@@ -0,0 +1,352 @@
1/*
2 * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9
10#include <sys/mman.h>
11#include <inttypes.h>
12#include <asm/bug.h>
13#include "debug.h"
14#include "event.h"
15#include "mmap.h"
16#include "util.h" /* page_size */
17
18size_t perf_mmap__mmap_len(struct perf_mmap *map)
19{
20 return map->mask + 1 + page_size;
21}
22
23/* When check_messup is true, 'end' must points to a good entry */
24static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup,
25 u64 start, u64 end, u64 *prev)
26{
27 unsigned char *data = map->base + page_size;
28 union perf_event *event = NULL;
29 int diff = end - start;
30
31 if (check_messup) {
32 /*
33 * If we're further behind than half the buffer, there's a chance
34 * the writer will bite our tail and mess up the samples under us.
35 *
36 * If we somehow ended up ahead of the 'end', we got messed up.
37 *
38 * In either case, truncate and restart at 'end'.
39 */
40 if (diff > map->mask / 2 || diff < 0) {
41 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
42
43 /*
44 * 'end' points to a known good entry, start there.
45 */
46 start = end;
47 diff = 0;
48 }
49 }
50
51 if (diff >= (int)sizeof(event->header)) {
52 size_t size;
53
54 event = (union perf_event *)&data[start & map->mask];
55 size = event->header.size;
56
57 if (size < sizeof(event->header) || diff < (int)size) {
58 event = NULL;
59 goto broken_event;
60 }
61
62 /*
63 * Event straddles the mmap boundary -- header should always
64 * be inside due to u64 alignment of output.
65 */
66 if ((start & map->mask) + size != ((start + size) & map->mask)) {
67 unsigned int offset = start;
68 unsigned int len = min(sizeof(*event), size), cpy;
69 void *dst = map->event_copy;
70
71 do {
72 cpy = min(map->mask + 1 - (offset & map->mask), len);
73 memcpy(dst, &data[offset & map->mask], cpy);
74 offset += cpy;
75 dst += cpy;
76 len -= cpy;
77 } while (len);
78
79 event = (union perf_event *)map->event_copy;
80 }
81
82 start += size;
83 }
84
85broken_event:
86 if (prev)
87 *prev = start;
88
89 return event;
90}
91
92union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup)
93{
94 u64 head;
95 u64 old = map->prev;
96
97 /*
98 * Check if event was unmapped due to a POLLHUP/POLLERR.
99 */
100 if (!refcount_read(&map->refcnt))
101 return NULL;
102
103 head = perf_mmap__read_head(map);
104
105 return perf_mmap__read(map, check_messup, old, head, &map->prev);
106}
107
108union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
109{
110 u64 head, end;
111 u64 start = map->prev;
112
113 /*
114 * Check if event was unmapped due to a POLLHUP/POLLERR.
115 */
116 if (!refcount_read(&map->refcnt))
117 return NULL;
118
119 head = perf_mmap__read_head(map);
120 if (!head)
121 return NULL;
122
123 /*
124 * 'head' pointer starts from 0. Kernel minus sizeof(record) form
125 * it each time when kernel writes to it, so in fact 'head' is
126 * negative. 'end' pointer is made manually by adding the size of
127 * the ring buffer to 'head' pointer, means the validate data can
128 * read is the whole ring buffer. If 'end' is positive, the ring
129 * buffer has not fully filled, so we must adjust 'end' to 0.
130 *
131 * However, since both 'head' and 'end' is unsigned, we can't
132 * simply compare 'end' against 0. Here we compare '-head' and
133 * the size of the ring buffer, where -head is the number of bytes
134 * kernel write to the ring buffer.
135 */
136 if (-head < (u64)(map->mask + 1))
137 end = 0;
138 else
139 end = head + map->mask + 1;
140
141 return perf_mmap__read(map, false, start, end, &map->prev);
142}
143
144void perf_mmap__read_catchup(struct perf_mmap *map)
145{
146 u64 head;
147
148 if (!refcount_read(&map->refcnt))
149 return;
150
151 head = perf_mmap__read_head(map);
152 map->prev = head;
153}
154
155static bool perf_mmap__empty(struct perf_mmap *map)
156{
157 return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
158}
159
160void perf_mmap__get(struct perf_mmap *map)
161{
162 refcount_inc(&map->refcnt);
163}
164
165void perf_mmap__put(struct perf_mmap *map)
166{
167 BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
168
169 if (refcount_dec_and_test(&map->refcnt))
170 perf_mmap__munmap(map);
171}
172
173void perf_mmap__consume(struct perf_mmap *map, bool overwrite)
174{
175 if (!overwrite) {
176 u64 old = map->prev;
177
178 perf_mmap__write_tail(map, old);
179 }
180
181 if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
182 perf_mmap__put(map);
183}
184
185int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
186 struct auxtrace_mmap_params *mp __maybe_unused,
187 void *userpg __maybe_unused,
188 int fd __maybe_unused)
189{
190 return 0;
191}
192
193void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
194{
195}
196
197void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
198 off_t auxtrace_offset __maybe_unused,
199 unsigned int auxtrace_pages __maybe_unused,
200 bool auxtrace_overwrite __maybe_unused)
201{
202}
203
204void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
205 struct perf_evlist *evlist __maybe_unused,
206 int idx __maybe_unused,
207 bool per_cpu __maybe_unused)
208{
209}
210
211void perf_mmap__munmap(struct perf_mmap *map)
212{
213 if (map->base != NULL) {
214 munmap(map->base, perf_mmap__mmap_len(map));
215 map->base = NULL;
216 map->fd = -1;
217 refcount_set(&map->refcnt, 0);
218 }
219 auxtrace_mmap__munmap(&map->auxtrace_mmap);
220}
221
222int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
223{
224 /*
225 * The last one will be done at perf_evlist__mmap_consume(), so that we
226 * make sure we don't prevent tools from consuming every last event in
227 * the ring buffer.
228 *
229 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
230 * anymore, but the last events for it are still in the ring buffer,
231 * waiting to be consumed.
232 *
233 * Tools can chose to ignore this at their own discretion, but the
234 * evlist layer can't just drop it when filtering events in
235 * perf_evlist__filter_pollfd().
236 */
237 refcount_set(&map->refcnt, 2);
238 map->prev = 0;
239 map->mask = mp->mask;
240 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
241 MAP_SHARED, fd, 0);
242 if (map->base == MAP_FAILED) {
243 pr_debug2("failed to mmap perf event ring buffer, error %d\n",
244 errno);
245 map->base = NULL;
246 return -1;
247 }
248 map->fd = fd;
249
250 if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
251 &mp->auxtrace_mp, map->base, fd))
252 return -1;
253
254 return 0;
255}
256
257static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
258{
259 struct perf_event_header *pheader;
260 u64 evt_head = head;
261 int size = mask + 1;
262
263 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
264 pheader = (struct perf_event_header *)(buf + (head & mask));
265 *start = head;
266 while (true) {
267 if (evt_head - head >= (unsigned int)size) {
268 pr_debug("Finished reading backward ring buffer: rewind\n");
269 if (evt_head - head > (unsigned int)size)
270 evt_head -= pheader->size;
271 *end = evt_head;
272 return 0;
273 }
274
275 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
276
277 if (pheader->size == 0) {
278 pr_debug("Finished reading backward ring buffer: get start\n");
279 *end = evt_head;
280 return 0;
281 }
282
283 evt_head += pheader->size;
284 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
285 }
286 WARN_ONCE(1, "Shouldn't get here\n");
287 return -1;
288}
289
290static int rb_find_range(void *data, int mask, u64 head, u64 old,
291 u64 *start, u64 *end, bool backward)
292{
293 if (!backward) {
294 *start = old;
295 *end = head;
296 return 0;
297 }
298
299 return backward_rb_find_range(data, mask, head, start, end);
300}
301
302int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
303 void *to, int push(void *to, void *buf, size_t size))
304{
305 u64 head = perf_mmap__read_head(md);
306 u64 old = md->prev;
307 u64 end = head, start = old;
308 unsigned char *data = md->base + page_size;
309 unsigned long size;
310 void *buf;
311 int rc = 0;
312
313 if (rb_find_range(data, md->mask, head, old, &start, &end, backward))
314 return -1;
315
316 if (start == end)
317 return 0;
318
319 size = end - start;
320 if (size > (unsigned long)(md->mask) + 1) {
321 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
322
323 md->prev = head;
324 perf_mmap__consume(md, overwrite || backward);
325 return 0;
326 }
327
328 if ((start & md->mask) + size != (end & md->mask)) {
329 buf = &data[start & md->mask];
330 size = md->mask + 1 - (start & md->mask);
331 start += size;
332
333 if (push(to, buf, size) < 0) {
334 rc = -1;
335 goto out;
336 }
337 }
338
339 buf = &data[start & md->mask];
340 size = end - start;
341 start += size;
342
343 if (push(to, buf, size) < 0) {
344 rc = -1;
345 goto out;
346 }
347
348 md->prev = head;
349 perf_mmap__consume(md, overwrite || backward);
350out:
351 return rc;
352}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
new file mode 100644
index 000000000000..efd78b827b05
--- /dev/null
+++ b/tools/perf/util/mmap.h
@@ -0,0 +1,97 @@
1#ifndef __PERF_MMAP_H
2#define __PERF_MMAP_H 1
3
4#include <linux/compiler.h>
5#include <linux/refcount.h>
6#include <linux/types.h>
7#include <asm/barrier.h>
8#include <stdbool.h>
9#include "auxtrace.h"
10#include "event.h"
11
12/**
13 * struct perf_mmap - perf's ring buffer mmap details
14 *
15 * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
16 */
17struct perf_mmap {
18 void *base;
19 int mask;
20 int fd;
21 refcount_t refcnt;
22 u64 prev;
23 struct auxtrace_mmap auxtrace_mmap;
24 char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
25};
26
27/*
28 * State machine of bkw_mmap_state:
29 *
30 * .________________(forbid)_____________.
31 * | V
32 * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
33 * ^ ^ | ^ |
34 * | |__(forbid)____/ |___(forbid)___/|
35 * | |
36 * \_________________(3)_______________/
37 *
38 * NOTREADY : Backward ring buffers are not ready
39 * RUNNING : Backward ring buffers are recording
40 * DATA_PENDING : We are required to collect data from backward ring buffers
41 * EMPTY : We have collected data from backward ring buffers.
42 *
43 * (0): Setup backward ring buffer
44 * (1): Pause ring buffers for reading
45 * (2): Read from ring buffers
46 * (3): Resume ring buffers for recording
47 */
48enum bkw_mmap_state {
49 BKW_MMAP_NOTREADY,
50 BKW_MMAP_RUNNING,
51 BKW_MMAP_DATA_PENDING,
52 BKW_MMAP_EMPTY,
53};
54
55struct mmap_params {
56 int prot, mask;
57 struct auxtrace_mmap_params auxtrace_mp;
58};
59
60int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd);
61void perf_mmap__munmap(struct perf_mmap *map);
62
63void perf_mmap__get(struct perf_mmap *map);
64void perf_mmap__put(struct perf_mmap *map);
65
66void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
67
68void perf_mmap__read_catchup(struct perf_mmap *md);
69
70static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
71{
72 struct perf_event_mmap_page *pc = mm->base;
73 u64 head = ACCESS_ONCE(pc->data_head);
74 rmb();
75 return head;
76}
77
78static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
79{
80 struct perf_event_mmap_page *pc = md->base;
81
82 /*
83 * ensure all reads are done before we write the tail out.
84 */
85 mb();
86 pc->data_tail = tail;
87}
88
89union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
90union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
91
92int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
93 void *to, int push(void *to, void *buf, size_t size));
94
95size_t perf_mmap__mmap_len(struct perf_mmap *map);
96
97#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index a58e91197729..5be021701f34 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -11,6 +11,7 @@
11#include "event.h" 11#include "event.h"
12#include <sys/types.h> 12#include <sys/types.h>
13#include <sys/stat.h> 13#include <sys/stat.h>
14#include <fcntl.h>
14#include <limits.h> 15#include <limits.h>
15#include <sched.h> 16#include <sched.h>
16#include <stdlib.h> 17#include <stdlib.h>
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index 05d82601c9a6..760558dcfd18 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -9,9 +9,10 @@
9#ifndef __PERF_NAMESPACES_H 9#ifndef __PERF_NAMESPACES_H
10#define __PERF_NAMESPACES_H 10#define __PERF_NAMESPACES_H
11 11
12#include "../perf.h" 12#include <sys/types.h>
13#include <linux/list.h> 13#include <linux/perf_event.h>
14#include <linux/refcount.h> 14#include <linux/refcount.h>
15#include <linux/types.h>
15 16
16struct namespaces_event; 17struct namespaces_event;
17 18
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 56694e3409ea..a7fcd95961ef 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -29,6 +29,7 @@
29#include "probe-file.h" 29#include "probe-file.h"
30#include "asm/bug.h" 30#include "asm/bug.h"
31#include "util/parse-branch-options.h" 31#include "util/parse-branch-options.h"
32#include "metricgroup.h"
32 33
33#define MAX_NAME_LEN 100 34#define MAX_NAME_LEN 100
34 35
@@ -1220,11 +1221,17 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
1220 struct perf_pmu_info info; 1221 struct perf_pmu_info info;
1221 struct perf_pmu *pmu; 1222 struct perf_pmu *pmu;
1222 struct perf_evsel *evsel; 1223 struct perf_evsel *evsel;
1224 struct parse_events_error *err = parse_state->error;
1223 LIST_HEAD(config_terms); 1225 LIST_HEAD(config_terms);
1224 1226
1225 pmu = perf_pmu__find(name); 1227 pmu = perf_pmu__find(name);
1226 if (!pmu) 1228 if (!pmu) {
1229 if (asprintf(&err->str,
1230 "Cannot find PMU `%s'. Missing kernel support?",
1231 name) < 0)
1232 err->str = NULL;
1227 return -EINVAL; 1233 return -EINVAL;
1234 }
1228 1235
1229 if (pmu->default_config) { 1236 if (pmu->default_config) {
1230 memcpy(&attr, pmu->default_config, 1237 memcpy(&attr, pmu->default_config,
@@ -1368,6 +1375,7 @@ struct event_modifier {
1368 int exclude_GH; 1375 int exclude_GH;
1369 int sample_read; 1376 int sample_read;
1370 int pinned; 1377 int pinned;
1378 int weak;
1371}; 1379};
1372 1380
1373static int get_event_modifier(struct event_modifier *mod, char *str, 1381static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -1386,6 +1394,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
1386 1394
1387 int exclude = eu | ek | eh; 1395 int exclude = eu | ek | eh;
1388 int exclude_GH = evsel ? evsel->exclude_GH : 0; 1396 int exclude_GH = evsel ? evsel->exclude_GH : 0;
1397 int weak = 0;
1389 1398
1390 memset(mod, 0, sizeof(*mod)); 1399 memset(mod, 0, sizeof(*mod));
1391 1400
@@ -1423,6 +1432,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
1423 sample_read = 1; 1432 sample_read = 1;
1424 } else if (*str == 'D') { 1433 } else if (*str == 'D') {
1425 pinned = 1; 1434 pinned = 1;
1435 } else if (*str == 'W') {
1436 weak = 1;
1426 } else 1437 } else
1427 break; 1438 break;
1428 1439
@@ -1453,6 +1464,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
1453 mod->exclude_GH = exclude_GH; 1464 mod->exclude_GH = exclude_GH;
1454 mod->sample_read = sample_read; 1465 mod->sample_read = sample_read;
1455 mod->pinned = pinned; 1466 mod->pinned = pinned;
1467 mod->weak = weak;
1456 1468
1457 return 0; 1469 return 0;
1458} 1470}
@@ -1466,7 +1478,7 @@ static int check_modifier(char *str)
1466 char *p = str; 1478 char *p = str;
1467 1479
1468 /* The sizeof includes 0 byte as well. */ 1480 /* The sizeof includes 0 byte as well. */
1469 if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) 1481 if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
1470 return -1; 1482 return -1;
1471 1483
1472 while (*p) { 1484 while (*p) {
@@ -1506,6 +1518,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
1506 evsel->exclude_GH = mod.exclude_GH; 1518 evsel->exclude_GH = mod.exclude_GH;
1507 evsel->sample_read = mod.sample_read; 1519 evsel->sample_read = mod.sample_read;
1508 evsel->precise_max = mod.precise_max; 1520 evsel->precise_max = mod.precise_max;
1521 evsel->weak_group = mod.weak;
1509 1522
1510 if (perf_evsel__is_group_leader(evsel)) 1523 if (perf_evsel__is_group_leader(evsel))
1511 evsel->attr.pinned = mod.pinned; 1524 evsel->attr.pinned = mod.pinned;
@@ -1728,8 +1741,8 @@ static int get_term_width(void)
1728 return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; 1741 return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
1729} 1742}
1730 1743
1731static void parse_events_print_error(struct parse_events_error *err, 1744void parse_events_print_error(struct parse_events_error *err,
1732 const char *event) 1745 const char *event)
1733{ 1746{
1734 const char *str = "invalid or unsupported event: "; 1747 const char *str = "invalid or unsupported event: ";
1735 char _buf[MAX_WIDTH]; 1748 char _buf[MAX_WIDTH];
@@ -1784,8 +1797,6 @@ static void parse_events_print_error(struct parse_events_error *err,
1784 zfree(&err->str); 1797 zfree(&err->str);
1785 zfree(&err->help); 1798 zfree(&err->help);
1786 } 1799 }
1787
1788 fprintf(stderr, "Run 'perf list' for a list of valid events\n");
1789} 1800}
1790 1801
1791#undef MAX_WIDTH 1802#undef MAX_WIDTH
@@ -1797,8 +1808,10 @@ int parse_events_option(const struct option *opt, const char *str,
1797 struct parse_events_error err = { .idx = 0, }; 1808 struct parse_events_error err = { .idx = 0, };
1798 int ret = parse_events(evlist, str, &err); 1809 int ret = parse_events(evlist, str, &err);
1799 1810
1800 if (ret) 1811 if (ret) {
1801 parse_events_print_error(&err, str); 1812 parse_events_print_error(&err, str);
1813 fprintf(stderr, "Run 'perf list' for a list of valid events\n");
1814 }
1802 1815
1803 return ret; 1816 return ret;
1804} 1817}
@@ -2376,6 +2389,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
2376 print_tracepoint_events(NULL, NULL, name_only); 2389 print_tracepoint_events(NULL, NULL, name_only);
2377 2390
2378 print_sdt_events(NULL, NULL, name_only); 2391 print_sdt_events(NULL, NULL, name_only);
2392
2393 metricgroup__print(true, true, NULL, name_only);
2379} 2394}
2380 2395
2381int parse_events__is_hardcoded_term(struct parse_events_term *term) 2396int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index eed50b54bab3..be337c266697 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -203,6 +203,9 @@ int is_valid_tracepoint(const char *event_string);
203int valid_event_mount(const char *eventfs); 203int valid_event_mount(const char *eventfs);
204char *parse_events_formats_error_string(char *additional_terms); 204char *parse_events_formats_error_string(char *additional_terms);
205 205
206void parse_events_print_error(struct parse_events_error *err,
207 const char *event);
208
206#ifdef HAVE_LIBELF_SUPPORT 209#ifdef HAVE_LIBELF_SUPPORT
207/* 210/*
208 * If the probe point starts with '%', 211 * If the probe point starts with '%',
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 025729510525..655ecff636a8 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -179,7 +179,7 @@ name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
179name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* 179name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
180drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? 180drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
181/* If you add a modifier you need to update check_modifier() */ 181/* If you add a modifier you need to update check_modifier() */
182modifier_event [ukhpPGHSDI]+ 182modifier_event [ukhpPGHSDIW]+
183modifier_bp [rwx]{1,3} 183modifier_bp [rwx]{1,3}
184 184
185%% 185%%
@@ -306,6 +306,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
306alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } 306alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
307emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } 307emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
308dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } 308dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
309duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
309bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } 310bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
310 311
311 /* 312 /*
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b10b35a63138..07cb2ac041d7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -3,6 +3,7 @@
3#include <linux/compiler.h> 3#include <linux/compiler.h>
4#include <sys/types.h> 4#include <sys/types.h>
5#include <errno.h> 5#include <errno.h>
6#include <fcntl.h>
6#include <sys/stat.h> 7#include <sys/stat.h>
7#include <unistd.h> 8#include <unistd.h>
8#include <stdio.h> 9#include <stdio.h>
@@ -541,16 +542,8 @@ char * __weak get_cpuid_str(void)
541 return NULL; 542 return NULL;
542} 543}
543 544
544/* 545static char *perf_pmu__getcpuid(void)
545 * From the pmu_events_map, find the table of PMU events that corresponds
546 * to the current running CPU. Then, add all PMU events from that table
547 * as aliases.
548 */
549static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
550{ 546{
551 int i;
552 struct pmu_events_map *map;
553 struct pmu_event *pe;
554 char *cpuid; 547 char *cpuid;
555 static bool printed; 548 static bool printed;
556 549
@@ -560,22 +553,50 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
560 if (!cpuid) 553 if (!cpuid)
561 cpuid = get_cpuid_str(); 554 cpuid = get_cpuid_str();
562 if (!cpuid) 555 if (!cpuid)
563 return; 556 return NULL;
564 557
565 if (!printed) { 558 if (!printed) {
566 pr_debug("Using CPUID %s\n", cpuid); 559 pr_debug("Using CPUID %s\n", cpuid);
567 printed = true; 560 printed = true;
568 } 561 }
562 return cpuid;
563}
564
565struct pmu_events_map *perf_pmu__find_map(void)
566{
567 struct pmu_events_map *map;
568 char *cpuid = perf_pmu__getcpuid();
569 int i;
569 570
570 i = 0; 571 i = 0;
571 while (1) { 572 for (;;) {
572 map = &pmu_events_map[i++]; 573 map = &pmu_events_map[i++];
573 if (!map->table) 574 if (!map->table) {
574 goto out; 575 map = NULL;
576 break;
577 }
575 578
576 if (!strcmp(map->cpuid, cpuid)) 579 if (!strcmp(map->cpuid, cpuid))
577 break; 580 break;
578 } 581 }
582 free(cpuid);
583 return map;
584}
585
586/*
587 * From the pmu_events_map, find the table of PMU events that corresponds
588 * to the current running CPU. Then, add all PMU events from that table
589 * as aliases.
590 */
591static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
592{
593 int i;
594 struct pmu_events_map *map;
595 struct pmu_event *pe;
596
597 map = perf_pmu__find_map();
598 if (!map)
599 return;
579 600
580 /* 601 /*
581 * Found a matching PMU events table. Create aliases 602 * Found a matching PMU events table. Create aliases
@@ -585,8 +606,11 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
585 const char *pname; 606 const char *pname;
586 607
587 pe = &map->table[i++]; 608 pe = &map->table[i++];
588 if (!pe->name) 609 if (!pe->name) {
610 if (pe->metric_group || pe->metric_name)
611 continue;
589 break; 612 break;
613 }
590 614
591 pname = pe->pmu ? pe->pmu : "cpu"; 615 pname = pe->pmu ? pe->pmu : "cpu";
592 if (strncmp(pname, name, strlen(pname))) 616 if (strncmp(pname, name, strlen(pname)))
@@ -600,9 +624,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
600 (char *)pe->metric_expr, 624 (char *)pe->metric_expr,
601 (char *)pe->metric_name); 625 (char *)pe->metric_name);
602 } 626 }
603
604out:
605 free(cpuid);
606} 627}
607 628
608struct perf_event_attr * __weak 629struct perf_event_attr * __weak
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index eca99435f4a0..27c75e635866 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -92,4 +92,6 @@ int perf_pmu__test(void);
92 92
93struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); 93struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
94 94
95struct pmu_events_map *perf_pmu__find_map(void);
96
95#endif /* __PMU_H */ 97#endif /* __PMU_H */
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
index 779e35c9e566..23e367063446 100644
--- a/tools/perf/util/print_binary.c
+++ b/tools/perf/util/print_binary.c
@@ -3,40 +3,42 @@
3#include <linux/log2.h> 3#include <linux/log2.h>
4#include "sane_ctype.h" 4#include "sane_ctype.h"
5 5
6void print_binary(unsigned char *data, size_t len, 6int binary__fprintf(unsigned char *data, size_t len,
7 size_t bytes_per_line, print_binary_t printer, 7 size_t bytes_per_line, binary__fprintf_t printer,
8 void *extra) 8 void *extra, FILE *fp)
9{ 9{
10 size_t i, j, mask; 10 size_t i, j, mask;
11 int printed = 0;
11 12
12 if (!printer) 13 if (!printer)
13 return; 14 return 0;
14 15
15 bytes_per_line = roundup_pow_of_two(bytes_per_line); 16 bytes_per_line = roundup_pow_of_two(bytes_per_line);
16 mask = bytes_per_line - 1; 17 mask = bytes_per_line - 1;
17 18
18 printer(BINARY_PRINT_DATA_BEGIN, 0, extra); 19 printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp);
19 for (i = 0; i < len; i++) { 20 for (i = 0; i < len; i++) {
20 if ((i & mask) == 0) { 21 if ((i & mask) == 0) {
21 printer(BINARY_PRINT_LINE_BEGIN, -1, extra); 22 printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp);
22 printer(BINARY_PRINT_ADDR, i, extra); 23 printed += printer(BINARY_PRINT_ADDR, i, extra, fp);
23 } 24 }
24 25
25 printer(BINARY_PRINT_NUM_DATA, data[i], extra); 26 printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp);
26 27
27 if (((i & mask) == mask) || i == len - 1) { 28 if (((i & mask) == mask) || i == len - 1) {
28 for (j = 0; j < mask-(i & mask); j++) 29 for (j = 0; j < mask-(i & mask); j++)
29 printer(BINARY_PRINT_NUM_PAD, -1, extra); 30 printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp);
30 31
31 printer(BINARY_PRINT_SEP, i, extra); 32 printer(BINARY_PRINT_SEP, i, extra, fp);
32 for (j = i & ~mask; j <= i; j++) 33 for (j = i & ~mask; j <= i; j++)
33 printer(BINARY_PRINT_CHAR_DATA, data[j], extra); 34 printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp);
34 for (j = 0; j < mask-(i & mask); j++) 35 for (j = 0; j < mask-(i & mask); j++)
35 printer(BINARY_PRINT_CHAR_PAD, i, extra); 36 printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp);
36 printer(BINARY_PRINT_LINE_END, -1, extra); 37 printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp);
37 } 38 }
38 } 39 }
39 printer(BINARY_PRINT_DATA_END, -1, extra); 40 printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp);
41 return printed;
40} 42}
41 43
42int is_printable_array(char *p, unsigned int len) 44int is_printable_array(char *p, unsigned int len)
diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h
index 2be3075e2b05..2a1554afc957 100644
--- a/tools/perf/util/print_binary.h
+++ b/tools/perf/util/print_binary.h
@@ -3,6 +3,7 @@
3#define PERF_PRINT_BINARY_H 3#define PERF_PRINT_BINARY_H
4 4
5#include <stddef.h> 5#include <stddef.h>
6#include <stdio.h>
6 7
7enum binary_printer_ops { 8enum binary_printer_ops {
8 BINARY_PRINT_DATA_BEGIN, 9 BINARY_PRINT_DATA_BEGIN,
@@ -17,12 +18,19 @@ enum binary_printer_ops {
17 BINARY_PRINT_DATA_END, 18 BINARY_PRINT_DATA_END,
18}; 19};
19 20
20typedef void (*print_binary_t)(enum binary_printer_ops op, 21typedef int (*binary__fprintf_t)(enum binary_printer_ops op,
21 unsigned int val, void *extra); 22 unsigned int val, void *extra, FILE *fp);
22 23
23void print_binary(unsigned char *data, size_t len, 24int binary__fprintf(unsigned char *data, size_t len,
24 size_t bytes_per_line, print_binary_t printer, 25 size_t bytes_per_line, binary__fprintf_t printer,
25 void *extra); 26 void *extra, FILE *fp);
27
28static inline void print_binary(unsigned char *data, size_t len,
29 size_t bytes_per_line, binary__fprintf_t printer,
30 void *extra)
31{
32 binary__fprintf(data, len, bytes_per_line, printer, extra, stdout);
33}
26 34
27int is_printable_array(char *p, unsigned int len); 35int is_printable_array(char *p, unsigned int len);
28 36
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index cdf8d83a484c..4ae1123c6794 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -15,6 +15,7 @@
15 * 15 *
16 */ 16 */
17#include <errno.h> 17#include <errno.h>
18#include <fcntl.h>
18#include <sys/stat.h> 19#include <sys/stat.h>
19#include <sys/types.h> 20#include <sys/types.h>
20#include <sys/uio.h> 21#include <sys/uio.h>
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index e66dc495809a..b4f2f06722a7 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -10,6 +10,7 @@ util/ctype.c
10util/evlist.c 10util/evlist.c
11util/evsel.c 11util/evsel.c
12util/cpumap.c 12util/cpumap.c
13util/mmap.c
13util/namespaces.c 14util/namespaces.c
14../lib/bitmap.c 15../lib/bitmap.c
15../lib/find_bit.c 16../lib/find_bit.c
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index 7d8972b33f6b..a920f702a74d 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -144,7 +144,8 @@ struct __name##_sorted *__name = __name##_sorted__new
144 __ilist->rblist.nr_entries) 144 __ilist->rblist.nr_entries)
145 145
146/* For 'struct machine->threads' */ 146/* For 'struct machine->threads' */
147#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ 147#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
148 DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) 148 DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \
149 __machine->threads[hash_bucket].nr)
149 150
150#endif /* _PERF_RESORT_RB_H_ */ 151#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000000..5e52e7baa7b6
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,32 @@
1#include "util.h"
2#include "rwsem.h"
3
4int init_rwsem(struct rw_semaphore *sem)
5{
6 return pthread_rwlock_init(&sem->lock, NULL);
7}
8
9int exit_rwsem(struct rw_semaphore *sem)
10{
11 return pthread_rwlock_destroy(&sem->lock);
12}
13
14int down_read(struct rw_semaphore *sem)
15{
16 return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
17}
18
19int up_read(struct rw_semaphore *sem)
20{
21 return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
22}
23
24int down_write(struct rw_semaphore *sem)
25{
26 return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
27}
28
29int up_write(struct rw_semaphore *sem)
30{
31 return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
32}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000000..94565ad4d494
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,19 @@
1#ifndef _PERF_RWSEM_H
2#define _PERF_RWSEM_H
3
4#include <pthread.h>
5
6struct rw_semaphore {
7 pthread_rwlock_t lock;
8};
9
10int init_rwsem(struct rw_semaphore *sem);
11int exit_rwsem(struct rw_semaphore *sem);
12
13int down_read(struct rw_semaphore *sem);
14int up_read(struct rw_semaphore *sem);
15
16int down_write(struct rw_semaphore *sem);
17int up_write(struct rw_semaphore *sem);
18
19#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index da55081aefc6..5c412310f266 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -33,14 +33,14 @@ static int perf_session__deliver_event(struct perf_session *session,
33 33
34static int perf_session__open(struct perf_session *session) 34static int perf_session__open(struct perf_session *session)
35{ 35{
36 struct perf_data_file *file = session->file; 36 struct perf_data *data = session->data;
37 37
38 if (perf_session__read_header(session) < 0) { 38 if (perf_session__read_header(session) < 0) {
39 pr_err("incompatible file format (rerun with -v to learn more)\n"); 39 pr_err("incompatible file format (rerun with -v to learn more)\n");
40 return -1; 40 return -1;
41 } 41 }
42 42
43 if (perf_data_file__is_pipe(file)) 43 if (perf_data__is_pipe(data))
44 return 0; 44 return 0;
45 45
46 if (perf_header__has_feat(&session->header, HEADER_STAT)) 46 if (perf_header__has_feat(&session->header, HEADER_STAT))
@@ -121,7 +121,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
121 session->tool, event->file_offset); 121 session->tool, event->file_offset);
122} 122}
123 123
124struct perf_session *perf_session__new(struct perf_data_file *file, 124struct perf_session *perf_session__new(struct perf_data *data,
125 bool repipe, struct perf_tool *tool) 125 bool repipe, struct perf_tool *tool)
126{ 126{
127 struct perf_session *session = zalloc(sizeof(*session)); 127 struct perf_session *session = zalloc(sizeof(*session));
@@ -135,13 +135,13 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
135 machines__init(&session->machines); 135 machines__init(&session->machines);
136 ordered_events__init(&session->ordered_events, ordered_events__deliver_event); 136 ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
137 137
138 if (file) { 138 if (data) {
139 if (perf_data_file__open(file)) 139 if (perf_data__open(data))
140 goto out_delete; 140 goto out_delete;
141 141
142 session->file = file; 142 session->data = data;
143 143
144 if (perf_data_file__is_read(file)) { 144 if (perf_data__is_read(data)) {
145 if (perf_session__open(session) < 0) 145 if (perf_session__open(session) < 0)
146 goto out_close; 146 goto out_close;
147 147
@@ -149,7 +149,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
149 * set session attributes that are present in perf.data 149 * set session attributes that are present in perf.data
150 * but not in pipe-mode. 150 * but not in pipe-mode.
151 */ 151 */
152 if (!file->is_pipe) { 152 if (!data->is_pipe) {
153 perf_session__set_id_hdr_size(session); 153 perf_session__set_id_hdr_size(session);
154 perf_session__set_comm_exec(session); 154 perf_session__set_comm_exec(session);
155 } 155 }
@@ -158,7 +158,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
158 session->machines.host.env = &perf_env; 158 session->machines.host.env = &perf_env;
159 } 159 }
160 160
161 if (!file || perf_data_file__is_write(file)) { 161 if (!data || perf_data__is_write(data)) {
162 /* 162 /*
163 * In O_RDONLY mode this will be performed when reading the 163 * In O_RDONLY mode this will be performed when reading the
164 * kernel MMAP event, in perf_event__process_mmap(). 164 * kernel MMAP event, in perf_event__process_mmap().
@@ -171,7 +171,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
171 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is 171 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
172 * processed, so perf_evlist__sample_id_all is not meaningful here. 172 * processed, so perf_evlist__sample_id_all is not meaningful here.
173 */ 173 */
174 if ((!file || !file->is_pipe) && tool && tool->ordering_requires_timestamps && 174 if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
175 tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { 175 tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
176 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); 176 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
177 tool->ordered_events = false; 177 tool->ordered_events = false;
@@ -180,7 +180,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
180 return session; 180 return session;
181 181
182 out_close: 182 out_close:
183 perf_data_file__close(file); 183 perf_data__close(data);
184 out_delete: 184 out_delete:
185 perf_session__delete(session); 185 perf_session__delete(session);
186 out: 186 out:
@@ -202,8 +202,8 @@ void perf_session__delete(struct perf_session *session)
202 perf_session__delete_threads(session); 202 perf_session__delete_threads(session);
203 perf_env__exit(&session->header.env); 203 perf_env__exit(&session->header.env);
204 machines__exit(&session->machines); 204 machines__exit(&session->machines);
205 if (session->file) 205 if (session->data)
206 perf_data_file__close(session->file); 206 perf_data__close(session->data);
207 free(session); 207 free(session);
208} 208}
209 209
@@ -291,8 +291,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
291 __maybe_unused) 291 __maybe_unused)
292{ 292{
293 dump_printf(": unhandled!\n"); 293 dump_printf(": unhandled!\n");
294 if (perf_data_file__is_pipe(session->file)) 294 if (perf_data__is_pipe(session->data))
295 skipn(perf_data_file__fd(session->file), event->auxtrace.size); 295 skipn(perf_data__fd(session->data), event->auxtrace.size);
296 return event->auxtrace.size; 296 return event->auxtrace.size;
297} 297}
298 298
@@ -1350,7 +1350,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
1350{ 1350{
1351 struct ordered_events *oe = &session->ordered_events; 1351 struct ordered_events *oe = &session->ordered_events;
1352 struct perf_tool *tool = session->tool; 1352 struct perf_tool *tool = session->tool;
1353 int fd = perf_data_file__fd(session->file); 1353 int fd = perf_data__fd(session->data);
1354 int err; 1354 int err;
1355 1355
1356 dump_event(session->evlist, event, file_offset, NULL); 1356 dump_event(session->evlist, event, file_offset, NULL);
@@ -1450,10 +1450,10 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
1450 goto out_parse_sample; 1450 goto out_parse_sample;
1451 } 1451 }
1452 1452
1453 if (perf_data_file__is_pipe(session->file)) 1453 if (perf_data__is_pipe(session->data))
1454 return -1; 1454 return -1;
1455 1455
1456 fd = perf_data_file__fd(session->file); 1456 fd = perf_data__fd(session->data);
1457 hdr_sz = sizeof(struct perf_event_header); 1457 hdr_sz = sizeof(struct perf_event_header);
1458 1458
1459 if (buf_sz < hdr_sz) 1459 if (buf_sz < hdr_sz)
@@ -1688,7 +1688,7 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
1688{ 1688{
1689 struct ordered_events *oe = &session->ordered_events; 1689 struct ordered_events *oe = &session->ordered_events;
1690 struct perf_tool *tool = session->tool; 1690 struct perf_tool *tool = session->tool;
1691 int fd = perf_data_file__fd(session->file); 1691 int fd = perf_data__fd(session->data);
1692 union perf_event *event; 1692 union perf_event *event;
1693 uint32_t size, cur_size = 0; 1693 uint32_t size, cur_size = 0;
1694 void *buf = NULL; 1694 void *buf = NULL;
@@ -1829,7 +1829,7 @@ static int __perf_session__process_events(struct perf_session *session,
1829{ 1829{
1830 struct ordered_events *oe = &session->ordered_events; 1830 struct ordered_events *oe = &session->ordered_events;
1831 struct perf_tool *tool = session->tool; 1831 struct perf_tool *tool = session->tool;
1832 int fd = perf_data_file__fd(session->file); 1832 int fd = perf_data__fd(session->data);
1833 u64 head, page_offset, file_offset, file_pos, size; 1833 u64 head, page_offset, file_offset, file_pos, size;
1834 int err, mmap_prot, mmap_flags, map_idx = 0; 1834 int err, mmap_prot, mmap_flags, map_idx = 0;
1835 size_t mmap_size; 1835 size_t mmap_size;
@@ -1850,7 +1850,7 @@ static int __perf_session__process_events(struct perf_session *session,
1850 if (data_offset + data_size < file_size) 1850 if (data_offset + data_size < file_size)
1851 file_size = data_offset + data_size; 1851 file_size = data_offset + data_size;
1852 1852
1853 ui_progress__init(&prog, file_size, "Processing events..."); 1853 ui_progress__init_size(&prog, file_size, "Processing events...");
1854 1854
1855 mmap_size = MMAP_SIZE; 1855 mmap_size = MMAP_SIZE;
1856 if (mmap_size > file_size) { 1856 if (mmap_size > file_size) {
@@ -1946,13 +1946,13 @@ out_err:
1946 1946
1947int perf_session__process_events(struct perf_session *session) 1947int perf_session__process_events(struct perf_session *session)
1948{ 1948{
1949 u64 size = perf_data_file__size(session->file); 1949 u64 size = perf_data__size(session->data);
1950 int err; 1950 int err;
1951 1951
1952 if (perf_session__register_idle_thread(session) < 0) 1952 if (perf_session__register_idle_thread(session) < 0)
1953 return -ENOMEM; 1953 return -ENOMEM;
1954 1954
1955 if (!perf_data_file__is_pipe(session->file)) 1955 if (!perf_data__is_pipe(session->data))
1956 err = __perf_session__process_events(session, 1956 err = __perf_session__process_events(session,
1957 session->header.data_offset, 1957 session->header.data_offset,
1958 session->header.data_size, size); 1958 session->header.data_size, size);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 3f63ee12471d..da1434a7c120 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -33,13 +33,13 @@ struct perf_session {
33 void *one_mmap_addr; 33 void *one_mmap_addr;
34 u64 one_mmap_offset; 34 u64 one_mmap_offset;
35 struct ordered_events ordered_events; 35 struct ordered_events ordered_events;
36 struct perf_data_file *file; 36 struct perf_data *data;
37 struct perf_tool *tool; 37 struct perf_tool *tool;
38}; 38};
39 39
40struct perf_tool; 40struct perf_tool;
41 41
42struct perf_session *perf_session__new(struct perf_data_file *file, 42struct perf_session *perf_session__new(struct perf_data *data,
43 bool repipe, struct perf_tool *tool); 43 bool repipe, struct perf_tool *tool);
44void perf_session__delete(struct perf_session *session); 44void perf_session__delete(struct perf_session *session);
45 45
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 84a33f1e9ec9..a00eacdf02ed 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -226,6 +226,9 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
226 if (sym_l == sym_r) 226 if (sym_l == sym_r)
227 return 0; 227 return 0;
228 228
229 if (sym_l->inlined || sym_r->inlined)
230 return strcmp(sym_l->name, sym_r->name);
231
229 if (sym_l->start != sym_r->start) 232 if (sym_l->start != sym_r->start)
230 return (int64_t)(sym_r->start - sym_l->start); 233 return (int64_t)(sym_r->start - sym_l->start);
231 234
@@ -284,6 +287,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
284 ret += repsep_snprintf(bf + ret, size - ret, "%.*s", 287 ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
285 width - ret, 288 width - ret,
286 sym->name); 289 sym->name);
290 if (sym->inlined)
291 ret += repsep_snprintf(bf + ret, size - ret,
292 " (inlined)");
287 } 293 }
288 } else { 294 } else {
289 size_t len = BITS_PER_LONG / 4; 295 size_t len = BITS_PER_LONG / 4;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b2b55e5149a7..f5901c10a563 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -130,7 +130,6 @@ struct hist_entry {
130 }; 130 };
131 char *srcline; 131 char *srcline;
132 char *srcfile; 132 char *srcfile;
133 struct inline_node *inline_node;
134 struct symbol *parent; 133 struct symbol *parent;
135 struct branch_info *branch_info; 134 struct branch_info *branch_info;
136 struct hists *hists; 135 struct hists *hists;
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 4105682afc7a..d19f05c56de6 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -11,7 +11,7 @@
11#include "util/debug.h" 11#include "util/debug.h"
12#include "util/callchain.h" 12#include "util/callchain.h"
13#include "srcline.h" 13#include "srcline.h"
14 14#include "string2.h"
15#include "symbol.h" 15#include "symbol.h"
16 16
17bool srcline_full_filename; 17bool srcline_full_filename;
@@ -34,28 +34,17 @@ static const char *dso__name(struct dso *dso)
34 return dso_name; 34 return dso_name;
35} 35}
36 36
37static int inline_list__append(char *filename, char *funcname, int line_nr, 37static int inline_list__append(struct symbol *symbol, char *srcline,
38 struct inline_node *node, struct dso *dso) 38 struct inline_node *node)
39{ 39{
40 struct inline_list *ilist; 40 struct inline_list *ilist;
41 char *demangled;
42 41
43 ilist = zalloc(sizeof(*ilist)); 42 ilist = zalloc(sizeof(*ilist));
44 if (ilist == NULL) 43 if (ilist == NULL)
45 return -1; 44 return -1;
46 45
47 ilist->filename = filename; 46 ilist->symbol = symbol;
48 ilist->line_nr = line_nr; 47 ilist->srcline = srcline;
49
50 if (dso != NULL) {
51 demangled = dso__demangle_sym(dso, 0, funcname);
52 if (demangled == NULL) {
53 ilist->funcname = funcname;
54 } else {
55 ilist->funcname = demangled;
56 free(funcname);
57 }
58 }
59 48
60 if (callchain_param.order == ORDER_CALLEE) 49 if (callchain_param.order == ORDER_CALLEE)
61 list_add_tail(&ilist->list, &node->val); 50 list_add_tail(&ilist->list, &node->val);
@@ -65,6 +54,65 @@ static int inline_list__append(char *filename, char *funcname, int line_nr,
65 return 0; 54 return 0;
66} 55}
67 56
57/* basename version that takes a const input string */
58static const char *gnu_basename(const char *path)
59{
60 const char *base = strrchr(path, '/');
61
62 return base ? base + 1 : path;
63}
64
65static char *srcline_from_fileline(const char *file, unsigned int line)
66{
67 char *srcline;
68
69 if (!file)
70 return NULL;
71
72 if (!srcline_full_filename)
73 file = gnu_basename(file);
74
75 if (asprintf(&srcline, "%s:%u", file, line) < 0)
76 return NULL;
77
78 return srcline;
79}
80
81static struct symbol *new_inline_sym(struct dso *dso,
82 struct symbol *base_sym,
83 const char *funcname)
84{
85 struct symbol *inline_sym;
86 char *demangled = NULL;
87
88 if (dso) {
89 demangled = dso__demangle_sym(dso, 0, funcname);
90 if (demangled)
91 funcname = demangled;
92 }
93
94 if (base_sym && strcmp(funcname, base_sym->name) == 0) {
95 /* reuse the real, existing symbol */
96 inline_sym = base_sym;
97 /* ensure that we don't alias an inlined symbol, which could
98 * lead to double frees in inline_node__delete
99 */
100 assert(!base_sym->inlined);
101 } else {
102 /* create a fake symbol for the inline frame */
103 inline_sym = symbol__new(base_sym ? base_sym->start : 0,
104 base_sym ? base_sym->end : 0,
105 base_sym ? base_sym->binding : 0,
106 funcname);
107 if (inline_sym)
108 inline_sym->inlined = 1;
109 }
110
111 free(demangled);
112
113 return inline_sym;
114}
115
68#ifdef HAVE_LIBBFD_SUPPORT 116#ifdef HAVE_LIBBFD_SUPPORT
69 117
70/* 118/*
@@ -208,18 +256,23 @@ static void addr2line_cleanup(struct a2l_data *a2l)
208#define MAX_INLINE_NEST 1024 256#define MAX_INLINE_NEST 1024
209 257
210static int inline_list__append_dso_a2l(struct dso *dso, 258static int inline_list__append_dso_a2l(struct dso *dso,
211 struct inline_node *node) 259 struct inline_node *node,
260 struct symbol *sym)
212{ 261{
213 struct a2l_data *a2l = dso->a2l; 262 struct a2l_data *a2l = dso->a2l;
214 char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL; 263 struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
215 char *filename = a2l->filename ? strdup(a2l->filename) : NULL; 264 char *srcline = NULL;
216 265
217 return inline_list__append(filename, funcname, a2l->line, node, dso); 266 if (a2l->filename)
267 srcline = srcline_from_fileline(a2l->filename, a2l->line);
268
269 return inline_list__append(inline_sym, srcline, node);
218} 270}
219 271
220static int addr2line(const char *dso_name, u64 addr, 272static int addr2line(const char *dso_name, u64 addr,
221 char **file, unsigned int *line, struct dso *dso, 273 char **file, unsigned int *line, struct dso *dso,
222 bool unwind_inlines, struct inline_node *node) 274 bool unwind_inlines, struct inline_node *node,
275 struct symbol *sym)
223{ 276{
224 int ret = 0; 277 int ret = 0;
225 struct a2l_data *a2l = dso->a2l; 278 struct a2l_data *a2l = dso->a2l;
@@ -245,7 +298,7 @@ static int addr2line(const char *dso_name, u64 addr,
245 if (unwind_inlines) { 298 if (unwind_inlines) {
246 int cnt = 0; 299 int cnt = 0;
247 300
248 if (node && inline_list__append_dso_a2l(dso, node)) 301 if (node && inline_list__append_dso_a2l(dso, node, sym))
249 return 0; 302 return 0;
250 303
251 while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, 304 while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
@@ -256,7 +309,7 @@ static int addr2line(const char *dso_name, u64 addr,
256 a2l->filename = NULL; 309 a2l->filename = NULL;
257 310
258 if (node != NULL) { 311 if (node != NULL) {
259 if (inline_list__append_dso_a2l(dso, node)) 312 if (inline_list__append_dso_a2l(dso, node, sym))
260 return 0; 313 return 0;
261 // found at least one inline frame 314 // found at least one inline frame
262 ret = 1; 315 ret = 1;
@@ -288,7 +341,7 @@ void dso__free_a2l(struct dso *dso)
288} 341}
289 342
290static struct inline_node *addr2inlines(const char *dso_name, u64 addr, 343static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
291 struct dso *dso) 344 struct dso *dso, struct symbol *sym)
292{ 345{
293 struct inline_node *node; 346 struct inline_node *node;
294 347
@@ -301,17 +354,8 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
301 INIT_LIST_HEAD(&node->val); 354 INIT_LIST_HEAD(&node->val);
302 node->addr = addr; 355 node->addr = addr;
303 356
304 if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node)) 357 addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
305 goto out_free_inline_node;
306
307 if (list_empty(&node->val))
308 goto out_free_inline_node;
309
310 return node; 358 return node;
311
312out_free_inline_node:
313 inline_node__delete(node);
314 return NULL;
315} 359}
316 360
317#else /* HAVE_LIBBFD_SUPPORT */ 361#else /* HAVE_LIBBFD_SUPPORT */
@@ -341,7 +385,8 @@ static int addr2line(const char *dso_name, u64 addr,
341 char **file, unsigned int *line_nr, 385 char **file, unsigned int *line_nr,
342 struct dso *dso __maybe_unused, 386 struct dso *dso __maybe_unused,
343 bool unwind_inlines __maybe_unused, 387 bool unwind_inlines __maybe_unused,
344 struct inline_node *node __maybe_unused) 388 struct inline_node *node __maybe_unused,
389 struct symbol *sym __maybe_unused)
345{ 390{
346 FILE *fp; 391 FILE *fp;
347 char cmd[PATH_MAX]; 392 char cmd[PATH_MAX];
@@ -381,16 +426,18 @@ void dso__free_a2l(struct dso *dso __maybe_unused)
381} 426}
382 427
383static struct inline_node *addr2inlines(const char *dso_name, u64 addr, 428static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
384 struct dso *dso __maybe_unused) 429 struct dso *dso __maybe_unused,
430 struct symbol *sym)
385{ 431{
386 FILE *fp; 432 FILE *fp;
387 char cmd[PATH_MAX]; 433 char cmd[PATH_MAX];
388 struct inline_node *node; 434 struct inline_node *node;
389 char *filename = NULL; 435 char *filename = NULL;
390 size_t len; 436 char *funcname = NULL;
437 size_t filelen, funclen;
391 unsigned int line_nr = 0; 438 unsigned int line_nr = 0;
392 439
393 scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i %016"PRIx64, 440 scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64,
394 dso_name, addr); 441 dso_name, addr);
395 442
396 fp = popen(cmd, "r"); 443 fp = popen(cmd, "r");
@@ -408,26 +455,34 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
408 INIT_LIST_HEAD(&node->val); 455 INIT_LIST_HEAD(&node->val);
409 node->addr = addr; 456 node->addr = addr;
410 457
411 while (getline(&filename, &len, fp) != -1) { 458 /* addr2line -f generates two lines for each inlined functions */
412 if (filename_split(filename, &line_nr) != 1) { 459 while (getline(&funcname, &funclen, fp) != -1) {
413 free(filename); 460 char *srcline;
461 struct symbol *inline_sym;
462
463 rtrim(funcname);
464
465 if (getline(&filename, &filelen, fp) == -1)
414 goto out; 466 goto out;
415 }
416 467
417 if (inline_list__append(filename, NULL, line_nr, node, 468 if (filename_split(filename, &line_nr) != 1)
418 NULL) != 0)
419 goto out; 469 goto out;
420 470
421 filename = NULL; 471 srcline = srcline_from_fileline(filename, line_nr);
472 inline_sym = new_inline_sym(dso, sym, funcname);
473
474 if (inline_list__append(inline_sym, srcline, node) != 0) {
475 free(srcline);
476 if (inline_sym && inline_sym->inlined)
477 symbol__delete(inline_sym);
478 goto out;
479 }
422 } 480 }
423 481
424out: 482out:
425 pclose(fp); 483 pclose(fp);
426 484 free(filename);
427 if (list_empty(&node->val)) { 485 free(funcname);
428 inline_node__delete(node);
429 return NULL;
430 }
431 486
432 return node; 487 return node;
433} 488}
@@ -455,19 +510,18 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
455 if (dso_name == NULL) 510 if (dso_name == NULL)
456 goto out; 511 goto out;
457 512
458 if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines, NULL)) 513 if (!addr2line(dso_name, addr, &file, &line, dso,
514 unwind_inlines, NULL, sym))
459 goto out; 515 goto out;
460 516
461 if (asprintf(&srcline, "%s:%u", 517 srcline = srcline_from_fileline(file, line);
462 srcline_full_filename ? file : basename(file), 518 free(file);
463 line) < 0) { 519
464 free(file); 520 if (!srcline)
465 goto out; 521 goto out;
466 }
467 522
468 dso->a2l_fails = 0; 523 dso->a2l_fails = 0;
469 524
470 free(file);
471 return srcline; 525 return srcline;
472 526
473out: 527out:
@@ -501,7 +555,74 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
501 return __get_srcline(dso, addr, sym, show_sym, show_addr, false); 555 return __get_srcline(dso, addr, sym, show_sym, show_addr, false);
502} 556}
503 557
504struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) 558struct srcline_node {
559 u64 addr;
560 char *srcline;
561 struct rb_node rb_node;
562};
563
564void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
565{
566 struct rb_node **p = &tree->rb_node;
567 struct rb_node *parent = NULL;
568 struct srcline_node *i, *node;
569
570 node = zalloc(sizeof(struct srcline_node));
571 if (!node) {
572 perror("not enough memory for the srcline node");
573 return;
574 }
575
576 node->addr = addr;
577 node->srcline = srcline;
578
579 while (*p != NULL) {
580 parent = *p;
581 i = rb_entry(parent, struct srcline_node, rb_node);
582 if (addr < i->addr)
583 p = &(*p)->rb_left;
584 else
585 p = &(*p)->rb_right;
586 }
587 rb_link_node(&node->rb_node, parent, p);
588 rb_insert_color(&node->rb_node, tree);
589}
590
591char *srcline__tree_find(struct rb_root *tree, u64 addr)
592{
593 struct rb_node *n = tree->rb_node;
594
595 while (n) {
596 struct srcline_node *i = rb_entry(n, struct srcline_node,
597 rb_node);
598
599 if (addr < i->addr)
600 n = n->rb_left;
601 else if (addr > i->addr)
602 n = n->rb_right;
603 else
604 return i->srcline;
605 }
606
607 return NULL;
608}
609
610void srcline__tree_delete(struct rb_root *tree)
611{
612 struct srcline_node *pos;
613 struct rb_node *next = rb_first(tree);
614
615 while (next) {
616 pos = rb_entry(next, struct srcline_node, rb_node);
617 next = rb_next(&pos->rb_node);
618 rb_erase(&pos->rb_node, tree);
619 free_srcline(pos->srcline);
620 zfree(&pos);
621 }
622}
623
624struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
625 struct symbol *sym)
505{ 626{
506 const char *dso_name; 627 const char *dso_name;
507 628
@@ -509,7 +630,7 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr)
509 if (dso_name == NULL) 630 if (dso_name == NULL)
510 return NULL; 631 return NULL;
511 632
512 return addr2inlines(dso_name, addr, dso); 633 return addr2inlines(dso_name, addr, dso, sym);
513} 634}
514 635
515void inline_node__delete(struct inline_node *node) 636void inline_node__delete(struct inline_node *node)
@@ -518,10 +639,63 @@ void inline_node__delete(struct inline_node *node)
518 639
519 list_for_each_entry_safe(ilist, tmp, &node->val, list) { 640 list_for_each_entry_safe(ilist, tmp, &node->val, list) {
520 list_del_init(&ilist->list); 641 list_del_init(&ilist->list);
521 zfree(&ilist->filename); 642 free_srcline(ilist->srcline);
522 zfree(&ilist->funcname); 643 /* only the inlined symbols are owned by the list */
644 if (ilist->symbol && ilist->symbol->inlined)
645 symbol__delete(ilist->symbol);
523 free(ilist); 646 free(ilist);
524 } 647 }
525 648
526 free(node); 649 free(node);
527} 650}
651
652void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines)
653{
654 struct rb_node **p = &tree->rb_node;
655 struct rb_node *parent = NULL;
656 const u64 addr = inlines->addr;
657 struct inline_node *i;
658
659 while (*p != NULL) {
660 parent = *p;
661 i = rb_entry(parent, struct inline_node, rb_node);
662 if (addr < i->addr)
663 p = &(*p)->rb_left;
664 else
665 p = &(*p)->rb_right;
666 }
667 rb_link_node(&inlines->rb_node, parent, p);
668 rb_insert_color(&inlines->rb_node, tree);
669}
670
671struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
672{
673 struct rb_node *n = tree->rb_node;
674
675 while (n) {
676 struct inline_node *i = rb_entry(n, struct inline_node,
677 rb_node);
678
679 if (addr < i->addr)
680 n = n->rb_left;
681 else if (addr > i->addr)
682 n = n->rb_right;
683 else
684 return i;
685 }
686
687 return NULL;
688}
689
690void inlines__tree_delete(struct rb_root *tree)
691{
692 struct inline_node *pos;
693 struct rb_node *next = rb_first(tree);
694
695 while (next) {
696 pos = rb_entry(next, struct inline_node, rb_node);
697 next = rb_next(&pos->rb_node);
698 rb_erase(&pos->rb_node, tree);
699 inline_node__delete(pos);
700 }
701}
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
index 8e73f607dfa3..847b7086182c 100644
--- a/tools/perf/util/srcline.h
+++ b/tools/perf/util/srcline.h
@@ -3,6 +3,7 @@
3#define PERF_SRCLINE_H 3#define PERF_SRCLINE_H
4 4
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/rbtree.h>
6#include <linux/types.h> 7#include <linux/types.h>
7 8
8struct dso; 9struct dso;
@@ -15,21 +16,38 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
15 bool show_sym, bool show_addr, bool unwind_inlines); 16 bool show_sym, bool show_addr, bool unwind_inlines);
16void free_srcline(char *srcline); 17void free_srcline(char *srcline);
17 18
19/* insert the srcline into the DSO, which will take ownership */
20void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline);
21/* find previously inserted srcline */
22char *srcline__tree_find(struct rb_root *tree, u64 addr);
23/* delete all srclines within the tree */
24void srcline__tree_delete(struct rb_root *tree);
25
18#define SRCLINE_UNKNOWN ((char *) "??:0") 26#define SRCLINE_UNKNOWN ((char *) "??:0")
19 27
20struct inline_list { 28struct inline_list {
21 char *filename; 29 struct symbol *symbol;
22 char *funcname; 30 char *srcline;
23 unsigned int line_nr;
24 struct list_head list; 31 struct list_head list;
25}; 32};
26 33
27struct inline_node { 34struct inline_node {
28 u64 addr; 35 u64 addr;
29 struct list_head val; 36 struct list_head val;
37 struct rb_node rb_node;
30}; 38};
31 39
32struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); 40/* parse inlined frames for the given address */
41struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
42 struct symbol *sym);
43/* free resources associated to the inline node list */
33void inline_node__delete(struct inline_node *node); 44void inline_node__delete(struct inline_node *node);
34 45
46/* insert the inline node list into the DSO, which will take ownership */
47void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines);
48/* find previously inserted inline node list */
49struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr);
50/* delete all nodes within the tree of inline_node s */
51void inlines__tree_delete(struct rb_root *tree);
52
35#endif /* PERF_SRCLINE_H */ 53#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 37363869c9a1..855e35cbb1dc 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -7,6 +7,7 @@
7#include "rblist.h" 7#include "rblist.h"
8#include "evlist.h" 8#include "evlist.h"
9#include "expr.h" 9#include "expr.h"
10#include "metricgroup.h"
10 11
11enum { 12enum {
12 CTX_BIT_USER = 1 << 0, 13 CTX_BIT_USER = 1 << 0,
@@ -56,7 +57,6 @@ struct saved_value {
56 struct rb_node rb_node; 57 struct rb_node rb_node;
57 struct perf_evsel *evsel; 58 struct perf_evsel *evsel;
58 int cpu; 59 int cpu;
59 int ctx;
60 struct stats stats; 60 struct stats stats;
61}; 61};
62 62
@@ -67,8 +67,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
67 rb_node); 67 rb_node);
68 const struct saved_value *b = entry; 68 const struct saved_value *b = entry;
69 69
70 if (a->ctx != b->ctx)
71 return a->ctx - b->ctx;
72 if (a->cpu != b->cpu) 70 if (a->cpu != b->cpu)
73 return a->cpu - b->cpu; 71 return a->cpu - b->cpu;
74 if (a->evsel == b->evsel) 72 if (a->evsel == b->evsel)
@@ -90,13 +88,12 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
90} 88}
91 89
92static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, 90static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
93 int cpu, int ctx, 91 int cpu,
94 bool create) 92 bool create)
95{ 93{
96 struct rb_node *nd; 94 struct rb_node *nd;
97 struct saved_value dm = { 95 struct saved_value dm = {
98 .cpu = cpu, 96 .cpu = cpu,
99 .ctx = ctx,
100 .evsel = evsel, 97 .evsel = evsel,
101 }; 98 };
102 nd = rblist__find(&runtime_saved_values, &dm); 99 nd = rblist__find(&runtime_saved_values, &dm);
@@ -182,59 +179,60 @@ void perf_stat__reset_shadow_stats(void)
182 * more semantic information such as miss/hit ratios, 179 * more semantic information such as miss/hit ratios,
183 * instruction rates, etc: 180 * instruction rates, etc:
184 */ 181 */
185void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, 182void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
186 int cpu) 183 int cpu)
187{ 184{
188 int ctx = evsel_context(counter); 185 int ctx = evsel_context(counter);
189 186
187 count *= counter->scale;
188
190 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || 189 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
191 perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) 190 perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
192 update_stats(&runtime_nsecs_stats[cpu], count[0]); 191 update_stats(&runtime_nsecs_stats[cpu], count);
193 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 192 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
194 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); 193 update_stats(&runtime_cycles_stats[ctx][cpu], count);
195 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 194 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
196 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); 195 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count);
197 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 196 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
198 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); 197 update_stats(&runtime_transaction_stats[ctx][cpu], count);
199 else if (perf_stat_evsel__is(counter, ELISION_START)) 198 else if (perf_stat_evsel__is(counter, ELISION_START))
200 update_stats(&runtime_elision_stats[ctx][cpu], count[0]); 199 update_stats(&runtime_elision_stats[ctx][cpu], count);
201 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 200 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
202 update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); 201 update_stats(&runtime_topdown_total_slots[ctx][cpu], count);
203 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 202 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
204 update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); 203 update_stats(&runtime_topdown_slots_issued[ctx][cpu], count);
205 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 204 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
206 update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); 205 update_stats(&runtime_topdown_slots_retired[ctx][cpu], count);
207 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 206 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
208 update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); 207 update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count);
209 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 208 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
210 update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); 209 update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count);
211 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 210 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
212 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); 211 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count);
213 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 212 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
214 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); 213 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count);
215 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 214 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
216 update_stats(&runtime_branches_stats[ctx][cpu], count[0]); 215 update_stats(&runtime_branches_stats[ctx][cpu], count);
217 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 216 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
218 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); 217 update_stats(&runtime_cacherefs_stats[ctx][cpu], count);
219 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 218 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
220 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); 219 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count);
221 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 220 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
222 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); 221 update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
223 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 222 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
224 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); 223 update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
225 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 224 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
226 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); 225 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count);
227 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 226 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
228 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); 227 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count);
229 else if (perf_stat_evsel__is(counter, SMI_NUM)) 228 else if (perf_stat_evsel__is(counter, SMI_NUM))
230 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); 229 update_stats(&runtime_smi_num_stats[ctx][cpu], count);
231 else if (perf_stat_evsel__is(counter, APERF)) 230 else if (perf_stat_evsel__is(counter, APERF))
232 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); 231 update_stats(&runtime_aperf_stats[ctx][cpu], count);
233 232
234 if (counter->collect_stat) { 233 if (counter->collect_stat) {
235 struct saved_value *v = saved_value_lookup(counter, cpu, ctx, 234 struct saved_value *v = saved_value_lookup(counter, cpu, true);
236 true); 235 update_stats(&v->stats, count);
237 update_stats(&v->stats, count[0]);
238 } 236 }
239} 237}
240 238
@@ -628,15 +626,68 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel,
628 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); 626 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
629} 627}
630 628
629static void generic_metric(const char *metric_expr,
630 struct perf_evsel **metric_events,
631 char *name,
632 const char *metric_name,
633 double avg,
634 int cpu,
635 struct perf_stat_output_ctx *out)
636{
637 print_metric_t print_metric = out->print_metric;
638 struct parse_ctx pctx;
639 double ratio;
640 int i;
641 void *ctxp = out->ctx;
642
643 expr__ctx_init(&pctx);
644 expr__add_id(&pctx, name, avg);
645 for (i = 0; metric_events[i]; i++) {
646 struct saved_value *v;
647 struct stats *stats;
648 double scale;
649
650 if (!strcmp(metric_events[i]->name, "duration_time")) {
651 stats = &walltime_nsecs_stats;
652 scale = 1e-9;
653 } else {
654 v = saved_value_lookup(metric_events[i], cpu, false);
655 if (!v)
656 break;
657 stats = &v->stats;
658 scale = 1.0;
659 }
660 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
661 }
662 if (!metric_events[i]) {
663 const char *p = metric_expr;
664
665 if (expr__parse(&ratio, &pctx, &p) == 0)
666 print_metric(ctxp, NULL, "%8.1f",
667 metric_name ?
668 metric_name :
669 out->force_header ? name : "",
670 ratio);
671 else
672 print_metric(ctxp, NULL, NULL,
673 out->force_header ?
674 (metric_name ? metric_name : name) : "", 0);
675 } else
676 print_metric(ctxp, NULL, NULL, "", 0);
677}
678
631void perf_stat__print_shadow_stats(struct perf_evsel *evsel, 679void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
632 double avg, int cpu, 680 double avg, int cpu,
633 struct perf_stat_output_ctx *out) 681 struct perf_stat_output_ctx *out,
682 struct rblist *metric_events)
634{ 683{
635 void *ctxp = out->ctx; 684 void *ctxp = out->ctx;
636 print_metric_t print_metric = out->print_metric; 685 print_metric_t print_metric = out->print_metric;
637 double total, ratio = 0.0, total2; 686 double total, ratio = 0.0, total2;
638 const char *color = NULL; 687 const char *color = NULL;
639 int ctx = evsel_context(evsel); 688 int ctx = evsel_context(evsel);
689 struct metric_event *me;
690 int num = 1;
640 691
641 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 692 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
642 total = avg_stats(&runtime_cycles_stats[ctx][cpu]); 693 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
@@ -820,33 +871,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
820 else 871 else
821 print_metric(ctxp, NULL, NULL, name, 0); 872 print_metric(ctxp, NULL, NULL, name, 0);
822 } else if (evsel->metric_expr) { 873 } else if (evsel->metric_expr) {
823 struct parse_ctx pctx; 874 generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
824 int i; 875 evsel->metric_name, avg, cpu, out);
825
826 expr__ctx_init(&pctx);
827 expr__add_id(&pctx, evsel->name, avg);
828 for (i = 0; evsel->metric_events[i]; i++) {
829 struct saved_value *v;
830
831 v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
832 if (!v)
833 break;
834 expr__add_id(&pctx, evsel->metric_events[i]->name,
835 avg_stats(&v->stats));
836 }
837 if (!evsel->metric_events[i]) {
838 const char *p = evsel->metric_expr;
839
840 if (expr__parse(&ratio, &pctx, &p) == 0)
841 print_metric(ctxp, NULL, "%8.1f",
842 evsel->metric_name ?
843 evsel->metric_name :
844 out->force_header ? evsel->name : "",
845 ratio);
846 else
847 print_metric(ctxp, NULL, NULL, "", 0);
848 } else
849 print_metric(ctxp, NULL, NULL, "", 0);
850 } else if (runtime_nsecs_stats[cpu].n != 0) { 876 } else if (runtime_nsecs_stats[cpu].n != 0) {
851 char unit = 'M'; 877 char unit = 'M';
852 char unit_buf[10]; 878 char unit_buf[10];
@@ -864,6 +890,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
864 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 890 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
865 print_smi_cost(cpu, evsel, out); 891 print_smi_cost(cpu, evsel, out);
866 } else { 892 } else {
867 print_metric(ctxp, NULL, NULL, NULL, 0); 893 num = 0;
868 } 894 }
895
896 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
897 struct metric_expr *mexp;
898
899 list_for_each_entry (mexp, &me->head, nd) {
900 if (num++ > 0)
901 out->new_line(ctxp);
902 generic_metric(mexp->metric_expr, mexp->metric_events,
903 evsel->name, mexp->metric_name,
904 avg, cpu, out);
905 }
906 }
907 if (num == 0)
908 print_metric(ctxp, NULL, NULL, NULL, 0);
869} 909}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c9bae5fb8b47..151e9efd7286 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -70,7 +70,7 @@ double rel_stddev_stats(double stddev, double avg)
70bool __perf_evsel_stat__is(struct perf_evsel *evsel, 70bool __perf_evsel_stat__is(struct perf_evsel *evsel,
71 enum perf_stat_evsel_id id) 71 enum perf_stat_evsel_id id)
72{ 72{
73 struct perf_stat_evsel *ps = evsel->priv; 73 struct perf_stat_evsel *ps = evsel->stats;
74 74
75 return ps->id == id; 75 return ps->id == id;
76} 76}
@@ -94,7 +94,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
94 94
95void perf_stat_evsel_id_init(struct perf_evsel *evsel) 95void perf_stat_evsel_id_init(struct perf_evsel *evsel)
96{ 96{
97 struct perf_stat_evsel *ps = evsel->priv; 97 struct perf_stat_evsel *ps = evsel->stats;
98 int i; 98 int i;
99 99
100 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ 100 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
@@ -110,7 +110,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
110static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) 110static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
111{ 111{
112 int i; 112 int i;
113 struct perf_stat_evsel *ps = evsel->priv; 113 struct perf_stat_evsel *ps = evsel->stats;
114 114
115 for (i = 0; i < 3; i++) 115 for (i = 0; i < 3; i++)
116 init_stats(&ps->res_stats[i]); 116 init_stats(&ps->res_stats[i]);
@@ -120,8 +120,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
120 120
121static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 121static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
122{ 122{
123 evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); 123 evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
124 if (evsel->priv == NULL) 124 if (evsel->stats == NULL)
125 return -ENOMEM; 125 return -ENOMEM;
126 perf_evsel__reset_stat_priv(evsel); 126 perf_evsel__reset_stat_priv(evsel);
127 return 0; 127 return 0;
@@ -129,11 +129,11 @@ static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
129 129
130static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) 130static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
131{ 131{
132 struct perf_stat_evsel *ps = evsel->priv; 132 struct perf_stat_evsel *ps = evsel->stats;
133 133
134 if (ps) 134 if (ps)
135 free(ps->group_data); 135 free(ps->group_data);
136 zfree(&evsel->priv); 136 zfree(&evsel->stats);
137} 137}
138 138
139static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, 139static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
@@ -278,7 +278,9 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
278 perf_evsel__compute_deltas(evsel, cpu, thread, count); 278 perf_evsel__compute_deltas(evsel, cpu, thread, count);
279 perf_counts_values__scale(count, config->scale, NULL); 279 perf_counts_values__scale(count, config->scale, NULL);
280 if (config->aggr_mode == AGGR_NONE) 280 if (config->aggr_mode == AGGR_NONE)
281 perf_stat__update_shadow_stats(evsel, count->values, cpu); 281 perf_stat__update_shadow_stats(evsel, count->val, cpu);
282 if (config->aggr_mode == AGGR_THREAD)
283 perf_stat__update_shadow_stats(evsel, count->val, 0);
282 break; 284 break;
283 case AGGR_GLOBAL: 285 case AGGR_GLOBAL:
284 aggr->val += count->val; 286 aggr->val += count->val;
@@ -319,9 +321,8 @@ int perf_stat_process_counter(struct perf_stat_config *config,
319 struct perf_evsel *counter) 321 struct perf_evsel *counter)
320{ 322{
321 struct perf_counts_values *aggr = &counter->counts->aggr; 323 struct perf_counts_values *aggr = &counter->counts->aggr;
322 struct perf_stat_evsel *ps = counter->priv; 324 struct perf_stat_evsel *ps = counter->stats;
323 u64 *count = counter->counts->aggr.values; 325 u64 *count = counter->counts->aggr.values;
324 u64 val;
325 int i, ret; 326 int i, ret;
326 327
327 aggr->val = aggr->ena = aggr->run = 0; 328 aggr->val = aggr->ena = aggr->run = 0;
@@ -361,8 +362,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
361 /* 362 /*
362 * Save the full runtime - to allow normalization during printout: 363 * Save the full runtime - to allow normalization during printout:
363 */ 364 */
364 val = counter->scale * *count; 365 perf_stat__update_shadow_stats(counter, *count, 0);
365 perf_stat__update_shadow_stats(counter, &val, 0);
366 366
367 return 0; 367 return 0;
368} 368}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 96326b1f9443..eefca5c981fd 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -83,7 +83,7 @@ typedef void (*new_line_t )(void *ctx);
83 83
84void perf_stat__init_shadow_stats(void); 84void perf_stat__init_shadow_stats(void);
85void perf_stat__reset_shadow_stats(void); 85void perf_stat__reset_shadow_stats(void);
86void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, 86void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
87 int cpu); 87 int cpu);
88struct perf_stat_output_ctx { 88struct perf_stat_output_ctx {
89 void *ctx; 89 void *ctx;
@@ -92,9 +92,11 @@ struct perf_stat_output_ctx {
92 bool force_header; 92 bool force_header;
93}; 93};
94 94
95struct rblist;
95void perf_stat__print_shadow_stats(struct perf_evsel *evsel, 96void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
96 double avg, int cpu, 97 double avg, int cpu,
97 struct perf_stat_output_ctx *out); 98 struct perf_stat_output_ctx *out,
99 struct rblist *metric_events);
98void perf_stat__collect_metric_expr(struct perf_evlist *); 100void perf_stat__collect_metric_expr(struct perf_evlist *);
99 101
100int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); 102int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 6492ef38b090..1b67a8639dfe 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -46,6 +46,7 @@ struct symbol_conf symbol_conf = {
46 .show_hist_headers = true, 46 .show_hist_headers = true,
47 .symfs = "", 47 .symfs = "",
48 .event_group = true, 48 .event_group = true,
49 .inline_name = true,
49}; 50};
50 51
51static enum dso_binary_type binary_type_symtab[] = { 52static enum dso_binary_type binary_type_symtab[] = {
@@ -227,7 +228,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
227 struct maps *maps = &mg->maps[type]; 228 struct maps *maps = &mg->maps[type];
228 struct map *next, *curr; 229 struct map *next, *curr;
229 230
230 pthread_rwlock_wrlock(&maps->lock); 231 down_write(&maps->lock);
231 232
232 curr = maps__first(maps); 233 curr = maps__first(maps);
233 if (curr == NULL) 234 if (curr == NULL)
@@ -247,7 +248,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
247 curr->end = ~0ULL; 248 curr->end = ~0ULL;
248 249
249out_unlock: 250out_unlock:
250 pthread_rwlock_unlock(&maps->lock); 251 up_write(&maps->lock);
251} 252}
252 253
253struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) 254struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -1672,7 +1673,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
1672 struct maps *maps = &mg->maps[type]; 1673 struct maps *maps = &mg->maps[type];
1673 struct map *map; 1674 struct map *map;
1674 1675
1675 pthread_rwlock_rdlock(&maps->lock); 1676 down_read(&maps->lock);
1676 1677
1677 for (map = maps__first(maps); map; map = map__next(map)) { 1678 for (map = maps__first(maps); map; map = map__next(map)) {
1678 if (map->dso && strcmp(map->dso->short_name, name) == 0) 1679 if (map->dso && strcmp(map->dso->short_name, name) == 0)
@@ -1682,7 +1683,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
1682 map = NULL; 1683 map = NULL;
1683 1684
1684out_unlock: 1685out_unlock:
1685 pthread_rwlock_unlock(&maps->lock); 1686 up_read(&maps->lock);
1686 return map; 1687 return map;
1687} 1688}
1688 1689
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 6352022593c6..a4f0075b4e5c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -60,6 +60,7 @@ struct symbol {
60 u8 binding; 60 u8 binding;
61 u8 idle:1; 61 u8 idle:1;
62 u8 ignore:1; 62 u8 ignore:1;
63 u8 inlined:1;
63 u8 arch_sym; 64 u8 arch_sym;
64 char name[0]; 65 char name[0];
65}; 66};
@@ -209,6 +210,7 @@ struct addr_location {
209 struct thread *thread; 210 struct thread *thread;
210 struct map *map; 211 struct map *map;
211 struct symbol *sym; 212 struct symbol *sym;
213 const char *srcline;
212 u64 addr; 214 u64 addr;
213 char level; 215 char level;
214 u8 filtered; 216 u8 filtered;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 1dbcd3c8dee0..68b65b10579b 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -46,6 +46,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
46 thread->cpu = -1; 46 thread->cpu = -1;
47 INIT_LIST_HEAD(&thread->namespaces_list); 47 INIT_LIST_HEAD(&thread->namespaces_list);
48 INIT_LIST_HEAD(&thread->comm_list); 48 INIT_LIST_HEAD(&thread->comm_list);
49 init_rwsem(&thread->namespaces_lock);
50 init_rwsem(&thread->comm_lock);
49 51
50 comm_str = malloc(32); 52 comm_str = malloc(32);
51 if (!comm_str) 53 if (!comm_str)
@@ -84,18 +86,26 @@ void thread__delete(struct thread *thread)
84 map_groups__put(thread->mg); 86 map_groups__put(thread->mg);
85 thread->mg = NULL; 87 thread->mg = NULL;
86 } 88 }
89 down_write(&thread->namespaces_lock);
87 list_for_each_entry_safe(namespaces, tmp_namespaces, 90 list_for_each_entry_safe(namespaces, tmp_namespaces,
88 &thread->namespaces_list, list) { 91 &thread->namespaces_list, list) {
89 list_del(&namespaces->list); 92 list_del(&namespaces->list);
90 namespaces__free(namespaces); 93 namespaces__free(namespaces);
91 } 94 }
95 up_write(&thread->namespaces_lock);
96
97 down_write(&thread->comm_lock);
92 list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { 98 list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
93 list_del(&comm->list); 99 list_del(&comm->list);
94 comm__free(comm); 100 comm__free(comm);
95 } 101 }
102 up_write(&thread->comm_lock);
103
96 unwind__finish_access(thread); 104 unwind__finish_access(thread);
97 nsinfo__zput(thread->nsinfo); 105 nsinfo__zput(thread->nsinfo);
98 106
107 exit_rwsem(&thread->namespaces_lock);
108 exit_rwsem(&thread->comm_lock);
99 free(thread); 109 free(thread);
100} 110}
101 111
@@ -126,8 +136,8 @@ struct namespaces *thread__namespaces(const struct thread *thread)
126 return list_first_entry(&thread->namespaces_list, struct namespaces, list); 136 return list_first_entry(&thread->namespaces_list, struct namespaces, list);
127} 137}
128 138
129int thread__set_namespaces(struct thread *thread, u64 timestamp, 139static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
130 struct namespaces_event *event) 140 struct namespaces_event *event)
131{ 141{
132 struct namespaces *new, *curr = thread__namespaces(thread); 142 struct namespaces *new, *curr = thread__namespaces(thread);
133 143
@@ -150,6 +160,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp,
150 return 0; 160 return 0;
151} 161}
152 162
163int thread__set_namespaces(struct thread *thread, u64 timestamp,
164 struct namespaces_event *event)
165{
166 int ret;
167
168 down_write(&thread->namespaces_lock);
169 ret = __thread__set_namespaces(thread, timestamp, event);
170 up_write(&thread->namespaces_lock);
171 return ret;
172}
173
153struct comm *thread__comm(const struct thread *thread) 174struct comm *thread__comm(const struct thread *thread)
154{ 175{
155 if (list_empty(&thread->comm_list)) 176 if (list_empty(&thread->comm_list))
@@ -171,8 +192,8 @@ struct comm *thread__exec_comm(const struct thread *thread)
171 return last; 192 return last;
172} 193}
173 194
174int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, 195static int ____thread__set_comm(struct thread *thread, const char *str,
175 bool exec) 196 u64 timestamp, bool exec)
176{ 197{
177 struct comm *new, *curr = thread__comm(thread); 198 struct comm *new, *curr = thread__comm(thread);
178 199
@@ -196,6 +217,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
196 return 0; 217 return 0;
197} 218}
198 219
220int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
221 bool exec)
222{
223 int ret;
224
225 down_write(&thread->comm_lock);
226 ret = ____thread__set_comm(thread, str, timestamp, exec);
227 up_write(&thread->comm_lock);
228 return ret;
229}
230
199int thread__set_comm_from_proc(struct thread *thread) 231int thread__set_comm_from_proc(struct thread *thread)
200{ 232{
201 char path[64]; 233 char path[64];
@@ -213,7 +245,7 @@ int thread__set_comm_from_proc(struct thread *thread)
213 return err; 245 return err;
214} 246}
215 247
216const char *thread__comm_str(const struct thread *thread) 248static const char *__thread__comm_str(const struct thread *thread)
217{ 249{
218 const struct comm *comm = thread__comm(thread); 250 const struct comm *comm = thread__comm(thread);
219 251
@@ -223,6 +255,17 @@ const char *thread__comm_str(const struct thread *thread)
223 return comm__str(comm); 255 return comm__str(comm);
224} 256}
225 257
258const char *thread__comm_str(const struct thread *thread)
259{
260 const char *str;
261
262 down_read((struct rw_semaphore *)&thread->comm_lock);
263 str = __thread__comm_str(thread);
264 up_read((struct rw_semaphore *)&thread->comm_lock);
265
266 return str;
267}
268
226/* CHECKME: it should probably better return the max comm len from its comm list */ 269/* CHECKME: it should probably better return the max comm len from its comm list */
227int thread__comm_len(struct thread *thread) 270int thread__comm_len(struct thread *thread)
228{ 271{
@@ -265,7 +308,7 @@ static int __thread__prepare_access(struct thread *thread)
265 struct maps *maps = &thread->mg->maps[i]; 308 struct maps *maps = &thread->mg->maps[i];
266 struct map *map; 309 struct map *map;
267 310
268 pthread_rwlock_rdlock(&maps->lock); 311 down_read(&maps->lock);
269 312
270 for (map = maps__first(maps); map; map = map__next(map)) { 313 for (map = maps__first(maps); map; map = map__next(map)) {
271 err = unwind__prepare_access(thread, map, &initialized); 314 err = unwind__prepare_access(thread, map, &initialized);
@@ -273,7 +316,7 @@ static int __thread__prepare_access(struct thread *thread)
273 break; 316 break;
274 } 317 }
275 318
276 pthread_rwlock_unlock(&maps->lock); 319 up_read(&maps->lock);
277 } 320 }
278 321
279 return err; 322 return err;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index fdcea7c0cac1..40cfa36c022a 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -10,6 +10,7 @@
10#include "symbol.h" 10#include "symbol.h"
11#include <strlist.h> 11#include <strlist.h>
12#include <intlist.h> 12#include <intlist.h>
13#include "rwsem.h"
13 14
14struct thread_stack; 15struct thread_stack;
15struct unwind_libunwind_ops; 16struct unwind_libunwind_ops;
@@ -30,7 +31,9 @@ struct thread {
30 int comm_len; 31 int comm_len;
31 bool dead; /* if set thread has exited */ 32 bool dead; /* if set thread has exited */
32 struct list_head namespaces_list; 33 struct list_head namespaces_list;
34 struct rw_semaphore namespaces_lock;
33 struct list_head comm_list; 35 struct list_head comm_list;
36 struct rw_semaphore comm_lock;
34 u64 db_id; 37 u64 db_id;
35 38
36 void *priv; 39 void *priv;
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 506150a75bd0..9892323cdd7c 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -38,6 +38,7 @@ struct perf_top {
38 int sym_pcnt_filter; 38 int sym_pcnt_filter;
39 const char *sym_filter; 39 const char *sym_filter;
40 float min_percent; 40 float min_percent;
41 unsigned int nr_threads_synthesize;
41}; 42};
42 43
43#define CONSOLE_CLEAR "" 44#define CONSOLE_CLEAR ""
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index e7d60d05596d..d7f2113462fb 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -28,7 +28,6 @@
28#include <sys/types.h> 28#include <sys/types.h>
29#include <sys/stat.h> 29#include <sys/stat.h>
30#include <sys/wait.h> 30#include <sys/wait.h>
31#include <pthread.h>
32#include <fcntl.h> 31#include <fcntl.h>
33#include <unistd.h> 32#include <unistd.h>
34#include <errno.h> 33#include <errno.h>
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 8a9a677f7576..40b425949aa3 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -27,7 +27,6 @@
27#include <sys/stat.h> 27#include <sys/stat.h>
28#include <sys/wait.h> 28#include <sys/wait.h>
29#include <sys/mman.h> 29#include <sys/mman.h>
30#include <pthread.h>
31#include <fcntl.h> 30#include <fcntl.h>
32#include <unistd.h> 31#include <unistd.h>
33#include <errno.h> 32#include <errno.h>
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 3687b720327a..a789f952b3e9 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -7,6 +7,7 @@
7#include <sys/stat.h> 7#include <sys/stat.h>
8#include <sys/utsname.h> 8#include <sys/utsname.h>
9#include <dirent.h> 9#include <dirent.h>
10#include <fcntl.h>
10#include <inttypes.h> 11#include <inttypes.h>
11#include <signal.h> 12#include <signal.h>
12#include <stdio.h> 13#include <stdio.h>
@@ -23,6 +24,19 @@
23/* 24/*
24 * XXX We need to find a better place for these things... 25 * XXX We need to find a better place for these things...
25 */ 26 */
27
28bool perf_singlethreaded = true;
29
30void perf_set_singlethreaded(void)
31{
32 perf_singlethreaded = true;
33}
34
35void perf_set_multithreaded(void)
36{
37 perf_singlethreaded = false;
38}
39
26unsigned int page_size; 40unsigned int page_size;
27int cacheline_size; 41int cacheline_size;
28 42
@@ -175,7 +189,7 @@ out:
175 return err; 189 return err;
176} 190}
177 191
178int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) 192static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
179{ 193{
180 void *ptr; 194 void *ptr;
181 loff_t pgoff; 195 loff_t pgoff;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index b52765e6d7b4..01434509c2e9 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -6,7 +6,6 @@
6/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ 6/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
7#define _DEFAULT_SOURCE 1 7#define _DEFAULT_SOURCE 1
8 8
9#include <fcntl.h>
10#include <stdbool.h> 9#include <stdbool.h>
11#include <stddef.h> 10#include <stddef.h>
12#include <stdlib.h> 11#include <stdlib.h>
@@ -36,7 +35,6 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d);
36int copyfile(const char *from, const char *to); 35int copyfile(const char *from, const char *to);
37int copyfile_mode(const char *from, const char *to, mode_t mode); 36int copyfile_mode(const char *from, const char *to, mode_t mode);
38int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); 37int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
39int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
40 38
41ssize_t readn(int fd, void *buf, size_t n); 39ssize_t readn(int fd, void *buf, size_t n);
42ssize_t writen(int fd, const void *buf, size_t n); 40ssize_t writen(int fd, const void *buf, size_t n);
@@ -65,4 +63,9 @@ int sched_getcpu(void);
65int setns(int fd, int nstype); 63int setns(int fd, int nstype);
66#endif 64#endif
67 65
66extern bool perf_singlethreaded;
67
68void perf_set_singlethreaded(void);
69void perf_set_multithreaded(void);
70
68#endif /* GIT_COMPAT_UTIL_H */ 71#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index cffcda448c28..0acb1ec0e2f0 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -320,7 +320,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
320 struct vdso_info *vdso_info; 320 struct vdso_info *vdso_info;
321 struct dso *dso = NULL; 321 struct dso *dso = NULL;
322 322
323 pthread_rwlock_wrlock(&machine->dsos.lock); 323 down_write(&machine->dsos.lock);
324 if (!machine->vdso_info) 324 if (!machine->vdso_info)
325 machine->vdso_info = vdso_info__new(); 325 machine->vdso_info = vdso_info__new();
326 326
@@ -348,7 +348,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
348 348
349out_unlock: 349out_unlock:
350 dso__get(dso); 350 dso__get(dso);
351 pthread_rwlock_unlock(&machine->dsos.lock); 351 up_write(&machine->dsos.lock);
352 return dso; 352 return dso;
353} 353}
354 354
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
index 008fe68d7b76..a725b958cf31 100644
--- a/tools/perf/util/zlib.c
+++ b/tools/perf/util/zlib.c
@@ -1,4 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <fcntl.h>
2#include <stdio.h> 3#include <stdio.h>
3#include <unistd.h> 4#include <unistd.h>
4#include <sys/stat.h> 5#include <sys/stat.h>