diff options
author | Adrian Hunter <adrian.hunter@intel.com> | 2015-07-17 12:33:42 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2015-08-17 10:11:37 -0400 |
commit | 5efb1d5489520ce72232bbc28e9156f0ebddc44e (patch) | |
tree | a5637c81f5d6576c848a6a3d1ef91790ab1e7b46 | |
parent | 90e457f7be0870052724b2d9c2c106e5847f2c19 (diff) |
perf tools: Take Intel PT into use
To record an AUX area, the weak function auxtrace_record__init() must be
implemented.
Equally to decode an AUX area, the AUX area tracing type must be added
to the perf_event__process_auxtrace_info() function.
This patch makes those two changes plus hooks up default config for the
intel_pt PMU. Also some brief documentation is provided for using the
tools with intel_pt.
Commiter note:
E.g:
[root@perf4 ~]# dmesg
451 [0.405807] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver.
[root@perf4 ~]# perf --version
perf version 4.1.g53874a
[root@perf4 ~]# perf record -e intel_pt//u -a sleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.383 MB perf.data ]
[root@perf4 ~]# perf evlist
intel_pt//u
sched:sched_switch
dummy:u
[root@perf4 ~]# perf report --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 0 of event 'intel_pt//u'
# Event count (approx.): 0
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. ......
#
# Samples: 393 of event 'sched:sched_switch'
# Event count (approx.): 393
#
# Overhead Command Shared Object Symbol
# ........ .............. ................ ..............
49.62% swapper [kernel.vmlinux] [k] __schedule
10.69% rcu_sched [kernel.vmlinux] [k] __schedule
6.62% rcuos/0 [kernel.vmlinux] [k] __schedule
5.60% kworker/0:1 [kernel.vmlinux] [k] __schedule
3.56% rcuos/3 [kernel.vmlinux] [k] __schedule
3.05% kworker/u384:2 [kernel.vmlinux] [k] __schedule
2.54% kworker/2:0 [kernel.vmlinux] [k] __schedule
2.54% tuned [kernel.vmlinux] [k] __schedule
<SNIP>
# Samples: 0 of event 'dummy:u'
# Event count (approx.): 0
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. ......
# Samples: 28 of event 'instructions:u'
# Event count (approx.): 5030172
#
# Overhead Command Shared Object Symbol
# ........ .......... ................... ................................
#
21.43% tuned libpython2.7.so.1.0 [.] PyEval_EvalFrameEx
|
---PyEval_EvalFrameEx
|
|--83.33%-- PyEval_EvalCodeEx
| PyEval_EvalFrameEx
| |
| |--60.00%-- PyEval_EvalCodeEx
| | PyEval_EvalFrameEx
| | PyEval_EvalFrameEx
| |
| --40.00%-- PyEval_EvalFrameEx
|
--16.67%-- PyEval_EvalFrameEx
PyEval_EvalCodeEx
PyEval_EvalFrameEx
PyEval_EvalCodeEx
PyEval_EvalFrameEx
PyEval_EvalFrameEx
14.29% tuned libpython2.7.so.1.0 [.] _PyType_Lookup
|
---_PyType_Lookup
_PyObject_GenericGetAttrWithDict
PyEval_EvalFrameEx
PyEval_EvalCodeEx
PyEval_EvalFrameEx
PyEval_EvalCodeEx
PyEval_EvalFrameEx
|
|--75.00%-- PyEval_EvalFrameEx
|
--25.00%-- PyEval_EvalCodeEx
PyEval_EvalFrameEx
PyEval_EvalFrameEx
3.57% irqbalance irqbalance [.] 0x0000000000004038
|
---0x4038
0x4761
0x4761
0x4761
0x49f1
0x2295
3.57% irqbalance libc-2.17.so [.] __GI_____strtoull_l_internal
|
---__GI_____strtoull_l_internal
0x6f49
0x229a
3.57% irqbalance libc-2.17.so [.] __strchrnul
|
---__strchrnul
vfprintf
__vsprintf_chk
__sprintf_chk
0x2724
0x4038
0x2331
3.57% irqbalance libc-2.17.so [.] __strstr_sse42
|
---__strstr_sse42
0x71e0
0x229f
# And now to some userspace ftrace on uninstrumented binaries 8-) :
# Hand edited to make it a bit more compact, replacing /home/acme/bin/perf
# with /bin/perf:
[root@perf4 ~]# perf script
perf 8921 [3] 7.310889: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310889: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310889: 1 branches:u: 481694 perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310889: 1 branches:u: 481630 perf_evlist__enable (/bin/perf) => 4816d8 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310889: 1 branches:u: 4816de perf_evlist__enable (/bin/perf) => 48164f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310889: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310889: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.310889: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310889: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.310890: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310890: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310890: 1 branches:u: 481694 perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310890: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310890: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.310890: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310890: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.310893: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310893: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310893: 1 branches:u: 4816a8 perf_evlist__enable (/bin/perf) => 4815f8 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310893: 1 branches:u: 4815fe perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310893: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310893: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.310893: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310893: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.310956: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310956: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310956: 1 branches:u: 481694 perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310956: 1 branches:u: 481630 perf_evlist__enable (/bin/perf) => 4816d8 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310956: 1 branches:u: 4816de perf_evlist__enable (/bin/perf) => 48164f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310956: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310956: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.310956: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310956: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.310961: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310961: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310961: 1 branches:u: 481694 perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310961: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310961: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.310961: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310961: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.310968: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310968: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310968: 1 branches:u: 4816a8 perf_evlist__enable (/bin/perf) => 4815f8 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310968: 1 branches:u: 4815fe perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310968: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.310968: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.310968: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.310968: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.311040: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.311040: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311040: 1 branches:u: 481694 perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311040: 1 branches:u: 481630 perf_evlist__enable (/bin/perf) => 4816d8 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311040: 1 branches:u: 4816de perf_evlist__enable (/bin/perf) => 48164f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311040: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311040: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.311040: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.311040: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.311046: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.311046: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311046: 1 branches:u: 481694 perf_evlist__enable (/bin/perf) => 481614 perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311046: 1 branches:u: 481652 perf_evlist__enable (/bin/perf) => 48165f perf_evlist__enable (/bin/perf)
perf 8921 [3] 7.311046: 1 branches:u: 481684 perf_evlist__enable (/bin/perf) => 41d250 ioctl@plt (/bin/perf)
perf 8921 [3] 7.311046: 1 branches:u: 41d250 ioctl@plt (/bin/perf) => 7fcecadbf250 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.311046: 1 branches:u: 7fcecadbf255 __GI___ioctl (/usr/lib64/libc-2.17.so) => 0 [unknown] ([unknown])
perf 8921 [3] 7.311050: 1 branches:u: 0 [unknown] ([unknown]) => 7fcecadbf257 __GI___ioctl (/usr/lib64/libc-2.17.so)
perf 8921 [3] 7.311050: 1 branches:u: 7fcecadbf25f __GI___ioctl (/usr/lib64/libc-2.17.so) => 481689 perf_evlist__enable (/bin/perf)
:
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/Documentation/intel-pt.txt | 588 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/Build | 2 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/auxtrace.c | 38 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/pmu.c | 15 | ||||
-rw-r--r-- | tools/perf/util/auxtrace.c | 5 | ||||
-rw-r--r-- | tools/perf/util/pmu.c | 4 |
6 files changed, 649 insertions, 3 deletions
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..2866b62eb293 --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt | |||
@@ -0,0 +1,588 @@ | |||
1 | Intel Processor Trace | ||
2 | ===================== | ||
3 | |||
4 | Overview | ||
5 | ======== | ||
6 | |||
7 | Intel Processor Trace (Intel PT) is an extension of Intel Architecture that | ||
8 | collects information about software execution such as control flow, execution | ||
9 | modes and timings and formats it into highly compressed binary packets. | ||
10 | Technical details are documented in the Intel 64 and IA-32 Architectures | ||
11 | Software Developer Manuals, Chapter 36 Intel Processor Trace. | ||
12 | |||
13 | Intel PT is first supported in Intel Core M and 5th generation Intel Core | ||
14 | processors that are based on the Intel micro-architecture code name Broadwell. | ||
15 | |||
16 | Trace data is collected by 'perf record' and stored within the perf.data file. | ||
17 | See below for options to 'perf record'. | ||
18 | |||
19 | Trace data must be 'decoded' which involves walking the object code and matching | ||
20 | the trace data packets. For example a TNT packet only tells whether a | ||
21 | conditional branch was taken or not taken, so to make use of that packet the | ||
22 | decoder must know precisely which instruction was being executed. | ||
23 | |||
24 | Decoding is done on-the-fly. The decoder outputs samples in the same format as | ||
25 | samples output by perf hardware events, for example as though the "instructions" | ||
26 | or "branches" events had been recorded. Presently 3 tools support this: | ||
27 | 'perf script', 'perf report' and 'perf inject'. See below for more information | ||
28 | on using those tools. | ||
29 | |||
30 | The main distinguishing feature of Intel PT is that the decoder can determine | ||
31 | the exact flow of software execution. Intel PT can be used to understand why | ||
32 | and how did software get to a certain point, or behave a certain way. The | ||
33 | software does not have to be recompiled, so Intel PT works with debug or release | ||
34 | builds, however the executed images are needed - which makes use in JIT-compiled | ||
35 | environments, or with self-modified code, a challenge. Also symbols need to be | ||
36 | provided to make sense of addresses. | ||
37 | |||
38 | A limitation of Intel PT is that it produces huge amounts of trace data | ||
39 | (hundreds of megabytes per second per core) which takes a long time to decode, | ||
40 | for example two or three orders of magnitude longer than it took to collect. | ||
41 | Another limitation is the performance impact of tracing, something that will | ||
42 | vary depending on the use-case and architecture. | ||
43 | |||
44 | |||
45 | Quickstart | ||
46 | ========== | ||
47 | |||
48 | It is important to start small. That is because it is easy to capture vastly | ||
49 | more data than can possibly be processed. | ||
50 | |||
51 | The simplest thing to do with Intel PT is userspace profiling of small programs. | ||
52 | Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: | ||
53 | |||
54 | perf record -e intel_pt//u ls | ||
55 | |||
56 | And profiled with 'perf report' e.g. | ||
57 | |||
58 | perf report | ||
59 | |||
60 | To also trace kernel space presents a problem, namely kernel self-modifying | ||
61 | code. A fairly good kernel image is available in /proc/kcore but to get an | ||
62 | accurate image a copy of /proc/kcore needs to be made under the same conditions | ||
63 | as the data capture. A script perf-with-kcore can do that, but beware that the | ||
64 | script makes use of 'sudo' to copy /proc/kcore. If you have perf installed | ||
65 | locally from the source tree you can do: | ||
66 | |||
67 | ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls | ||
68 | |||
69 | which will create a directory named 'pt_ls' and put the perf.data file and | ||
70 | copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use | ||
71 | 'perf report' becomes: | ||
72 | |||
73 | ~/libexec/perf-core/perf-with-kcore report pt_ls | ||
74 | |||
75 | Because samples are synthesized after-the-fact, the sampling period can be | ||
76 | selected for reporting. e.g. sample every microsecond | ||
77 | |||
78 | ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge | ||
79 | |||
80 | See the sections below for more information about the --itrace option. | ||
81 | |||
82 | Beware the smaller the period, the more samples that are produced, and the | ||
83 | longer it takes to process them. | ||
84 | |||
85 | Also note that the coarseness of Intel PT timing information will start to | ||
86 | distort the statistical value of the sampling as the sampling period becomes | ||
87 | smaller. | ||
88 | |||
89 | To represent software control flow, "branches" samples are produced. By default | ||
90 | a branch sample is synthesized for every single branch. To get an idea what | ||
91 | data is available you can use the 'perf script' tool with no parameters, which | ||
92 | will list all the samples. | ||
93 | |||
94 | perf record -e intel_pt//u ls | ||
95 | perf script | ||
96 | |||
97 | An interesting field that is not printed by default is 'flags' which can be | ||
98 | displayed as follows: | ||
99 | |||
100 | perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags | ||
101 | |||
102 | The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, | ||
103 | system, asynchronous, interrupt, transaction abort, trace begin, trace end, and | ||
104 | in transaction, respectively. | ||
105 | |||
106 | While it is possible to create scripts to analyze the data, an alternative | ||
107 | approach is available to export the data to a postgresql database. Refer to | ||
108 | script export-to-postgresql.py for more details, and to script | ||
109 | call-graph-from-postgresql.py for an example of using the database. | ||
110 | |||
111 | As mentioned above, it is easy to capture too much data. One way to limit the | ||
112 | data captured is to use 'snapshot' mode which is explained further below. | ||
113 | Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. | ||
114 | |||
115 | Another problem that will be experienced is decoder errors. They can be caused | ||
116 | by inability to access the executed image, self-modified or JIT-ed code, or the | ||
117 | inability to match side-band information (such as context switches and mmaps) | ||
118 | which results in the decoder not knowing what code was executed. | ||
119 | |||
120 | There is also the problem of perf not being able to copy the data fast enough, | ||
121 | resulting in data lost because the buffer was full. See 'Buffer handling' below | ||
122 | for more details. | ||
123 | |||
124 | |||
125 | perf record | ||
126 | =========== | ||
127 | |||
128 | new event | ||
129 | --------- | ||
130 | |||
131 | The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are | ||
132 | selected by providing the PMU name followed by the "config" separated by slashes. | ||
133 | An enhancement has been made to allow default "config" e.g. the option | ||
134 | |||
135 | -e intel_pt// | ||
136 | |||
137 | will use a default config value. Currently that is the same as | ||
138 | |||
139 | -e intel_pt/tsc,noretcomp=0/ | ||
140 | |||
141 | which is the same as | ||
142 | |||
143 | -e intel_pt/tsc=1,noretcomp=0/ | ||
144 | |||
145 | The config terms are listed in /sys/devices/intel_pt/format. They are bit | ||
146 | fields within the config member of the struct perf_event_attr which is | ||
147 | passed to the kernel by the perf_event_open system call. They correspond to bit | ||
148 | fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: | ||
149 | |||
150 | $ for f in `ls /sys/devices/intel_pt/format`;do | ||
151 | > echo $f | ||
152 | > cat /sys/devices/intel_pt/format/$f | ||
153 | > done | ||
154 | noretcomp | ||
155 | config:11 | ||
156 | tsc | ||
157 | config:10 | ||
158 | |||
159 | Note that the default config must be overridden for each term i.e. | ||
160 | |||
161 | -e intel_pt/noretcomp=0/ | ||
162 | |||
163 | is the same as: | ||
164 | |||
165 | -e intel_pt/tsc=1,noretcomp=0/ | ||
166 | |||
167 | So, to disable TSC packets use: | ||
168 | |||
169 | -e intel_pt/tsc=0/ | ||
170 | |||
171 | It is also possible to specify the config value explicitly: | ||
172 | |||
173 | -e intel_pt/config=0x400/ | ||
174 | |||
175 | Note that, as with all events, the event is suffixed with event modifiers: | ||
176 | |||
177 | u userspace | ||
178 | k kernel | ||
179 | h hypervisor | ||
180 | G guest | ||
181 | H host | ||
182 | p precise ip | ||
183 | |||
184 | 'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. | ||
185 | 'p' is also not relevant to Intel PT. So only options 'u' and 'k' are | ||
186 | meaningful for Intel PT. | ||
187 | |||
188 | perf_event_attr is displayed if the -vv option is used e.g. | ||
189 | |||
190 | ------------------------------------------------------------ | ||
191 | perf_event_attr: | ||
192 | type 6 | ||
193 | size 112 | ||
194 | config 0x400 | ||
195 | { sample_period, sample_freq } 1 | ||
196 | sample_type IP|TID|TIME|CPU|IDENTIFIER | ||
197 | read_format ID | ||
198 | disabled 1 | ||
199 | inherit 1 | ||
200 | exclude_kernel 1 | ||
201 | exclude_hv 1 | ||
202 | enable_on_exec 1 | ||
203 | sample_id_all 1 | ||
204 | ------------------------------------------------------------ | ||
205 | sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 | ||
206 | sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 | ||
207 | sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 | ||
208 | sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 | ||
209 | ------------------------------------------------------------ | ||
210 | |||
211 | |||
212 | new snapshot option | ||
213 | ------------------- | ||
214 | |||
215 | To select snapshot mode a new option has been added: | ||
216 | |||
217 | -S | ||
218 | |||
219 | Optionally it can be followed by the snapshot size e.g. | ||
220 | |||
221 | -S0x100000 | ||
222 | |||
223 | The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size | ||
224 | nor snapshot size is specified, then the default is 4MiB for privileged users | ||
225 | (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. | ||
226 | If an unprivileged user does not specify mmap pages, the mmap pages will be | ||
227 | reduced as described in the 'new auxtrace mmap size option' section below. | ||
228 | |||
229 | The snapshot size is displayed if the option -vv is used e.g. | ||
230 | |||
231 | Intel PT snapshot size: %zu | ||
232 | |||
233 | |||
234 | new auxtrace mmap size option | ||
235 | --------------------------- | ||
236 | |||
237 | Intel PT buffer size is specified by an addition to the -m option e.g. | ||
238 | |||
239 | -m,16 | ||
240 | |||
241 | selects a buffer size of 16 pages i.e. 64KiB. | ||
242 | |||
243 | Note that the existing functionality of -m is unchanged. The auxtrace mmap size | ||
244 | is specified by the optional addition of a comma and the value. | ||
245 | |||
246 | The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users | ||
247 | (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. | ||
248 | If an unprivileged user does not specify mmap pages, the mmap pages will be | ||
249 | reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the | ||
250 | user is likely to get an error as they exceed their mlock limit (Max locked | ||
251 | memory as shown in /proc/self/limits). Note that perf does not count the first | ||
252 | 512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu | ||
253 | against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus | ||
254 | their mlock limit (which defaults to 64KiB but is not multiplied by the number | ||
255 | of cpus). | ||
256 | |||
257 | In full-trace mode, powers of two are allowed for buffer size, with a minimum | ||
258 | size of 2 pages. In snapshot mode, it is the same but the minimum size is | ||
259 | 1 page. | ||
260 | |||
261 | The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. | ||
262 | |||
263 | mmap length 528384 | ||
264 | auxtrace mmap length 4198400 | ||
265 | |||
266 | |||
267 | Intel PT modes of operation | ||
268 | --------------------------- | ||
269 | |||
270 | Intel PT can be used in 2 modes: | ||
271 | full-trace mode | ||
272 | snapshot mode | ||
273 | |||
274 | Full-trace mode traces continuously e.g. | ||
275 | |||
276 | perf record -e intel_pt//u uname | ||
277 | |||
278 | Snapshot mode captures the available data when a signal is sent e.g. | ||
279 | |||
280 | perf record -v -e intel_pt//u -S ./loopy 1000000000 & | ||
281 | [1] 11435 | ||
282 | kill -USR2 11435 | ||
283 | Recording AUX area tracing snapshot | ||
284 | |||
285 | Note that the signal sent is SIGUSR2. | ||
286 | Note that "Recording AUX area tracing snapshot" is displayed because the -v | ||
287 | option is used. | ||
288 | |||
289 | The 2 modes cannot be used together. | ||
290 | |||
291 | |||
292 | Buffer handling | ||
293 | --------------- | ||
294 | |||
295 | There may be buffer limitations (i.e. single ToPa entry) which means that actual | ||
296 | buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to | ||
297 | provide other sizes, and in particular an arbitrarily large size, multiple | ||
298 | buffers are logically concatenated. However an interrupt must be used to switch | ||
299 | between buffers. That has two potential problems: | ||
300 | a) the interrupt may not be handled in time so that the current buffer | ||
301 | becomes full and some trace data is lost. | ||
302 | b) the interrupts may slow the system and affect the performance | ||
303 | results. | ||
304 | |||
305 | If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event | ||
306 | which the tools report as an error. | ||
307 | |||
308 | In full-trace mode, the driver waits for data to be copied out before allowing | ||
309 | the (logical) buffer to wrap-around. If data is not copied out quickly enough, | ||
310 | again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to | ||
311 | wait, the intel_pt event gets disabled. Because it is difficult to know when | ||
312 | that happens, perf tools always re-enable the intel_pt event after copying out | ||
313 | data. | ||
314 | |||
315 | |||
316 | Intel PT and build ids | ||
317 | ---------------------- | ||
318 | |||
319 | By default "perf record" post-processes the event stream to find all build ids | ||
320 | for executables for all addresses sampled. Deliberately, Intel PT is not | ||
321 | decoded for that purpose (it would take too long). Instead the build ids for | ||
322 | all executables encountered (due to mmap, comm or task events) are included | ||
323 | in the perf.data file. | ||
324 | |||
325 | To see buildids included in the perf.data file use the command: | ||
326 | |||
327 | perf buildid-list | ||
328 | |||
329 | If the perf.data file contains Intel PT data, that is the same as: | ||
330 | |||
331 | perf buildid-list --with-hits | ||
332 | |||
333 | |||
334 | Snapshot mode and event disabling | ||
335 | --------------------------------- | ||
336 | |||
337 | In order to make a snapshot, the intel_pt event is disabled using an IOCTL, | ||
338 | namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the | ||
339 | collection of side-band information. In order to prevent that, a dummy | ||
340 | software event has been introduced that permits tracking events (like mmaps) to | ||
341 | continue to be recorded while intel_pt is disabled. That is important to ensure | ||
342 | there is complete side-band information to allow the decoding of subsequent | ||
343 | snapshots. | ||
344 | |||
345 | A test has been created for that. To find the test: | ||
346 | |||
347 | perf test list | ||
348 | ... | ||
349 | 23: Test using a dummy software event to keep tracking | ||
350 | |||
351 | To run the test: | ||
352 | |||
353 | perf test 23 | ||
354 | 23: Test using a dummy software event to keep tracking : Ok | ||
355 | |||
356 | |||
357 | perf record modes (nothing new here) | ||
358 | ------------------------------------ | ||
359 | |||
360 | perf record essentially operates in one of three modes: | ||
361 | per thread | ||
362 | per cpu | ||
363 | workload only | ||
364 | |||
365 | "per thread" mode is selected by -t or by --per-thread (with -p or -u or just a | ||
366 | workload). | ||
367 | "per cpu" is selected by -C or -a. | ||
368 | "workload only" mode is selected by not using the other options but providing a | ||
369 | command to run (i.e. the workload). | ||
370 | |||
371 | In per-thread mode an exact list of threads is traced. There is no inheritance. | ||
372 | Each thread has its own event buffer. | ||
373 | |||
374 | In per-cpu mode all processes (or processes from the selected cgroup i.e. -G | ||
375 | option, or processes selected with -p or -u) are traced. Each cpu has its own | ||
376 | buffer. Inheritance is allowed. | ||
377 | |||
378 | In workload-only mode, the workload is traced but with per-cpu buffers. | ||
379 | Inheritance is allowed. Note that you can now trace a workload in per-thread | ||
380 | mode by using the --per-thread option. | ||
381 | |||
382 | |||
383 | Privileged vs non-privileged users | ||
384 | ---------------------------------- | ||
385 | |||
386 | Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users | ||
387 | have memory limits imposed upon them. That affects what buffer sizes they can | ||
388 | have as outlined above. | ||
389 | |||
390 | Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are | ||
391 | not permitted to use tracepoints which means there is insufficient side-band | ||
392 | information to decode Intel PT in per-cpu mode, and potentially workload-only | ||
393 | mode too if the workload creates new processes. | ||
394 | |||
395 | Note also, that to use tracepoints, read-access to debugfs is required. So if | ||
396 | debugfs is not mounted or the user does not have read-access, it will again not | ||
397 | be possible to decode Intel PT in per-cpu mode. | ||
398 | |||
399 | |||
400 | sched_switch tracepoint | ||
401 | ----------------------- | ||
402 | |||
403 | The sched_switch tracepoint is used to provide side-band data for Intel PT | ||
404 | decoding. sched_switch events are automatically added. e.g. the second event | ||
405 | shown below | ||
406 | |||
407 | $ perf record -vv -e intel_pt//u uname | ||
408 | ------------------------------------------------------------ | ||
409 | perf_event_attr: | ||
410 | type 6 | ||
411 | size 112 | ||
412 | config 0x400 | ||
413 | { sample_period, sample_freq } 1 | ||
414 | sample_type IP|TID|TIME|CPU|IDENTIFIER | ||
415 | read_format ID | ||
416 | disabled 1 | ||
417 | inherit 1 | ||
418 | exclude_kernel 1 | ||
419 | exclude_hv 1 | ||
420 | enable_on_exec 1 | ||
421 | sample_id_all 1 | ||
422 | ------------------------------------------------------------ | ||
423 | sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 | ||
424 | sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 | ||
425 | sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 | ||
426 | sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 | ||
427 | ------------------------------------------------------------ | ||
428 | perf_event_attr: | ||
429 | type 2 | ||
430 | size 112 | ||
431 | config 0x108 | ||
432 | { sample_period, sample_freq } 1 | ||
433 | sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER | ||
434 | read_format ID | ||
435 | inherit 1 | ||
436 | sample_id_all 1 | ||
437 | exclude_guest 1 | ||
438 | ------------------------------------------------------------ | ||
439 | sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 | ||
440 | sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 | ||
441 | sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 | ||
442 | sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 | ||
443 | ------------------------------------------------------------ | ||
444 | perf_event_attr: | ||
445 | type 1 | ||
446 | size 112 | ||
447 | config 0x9 | ||
448 | { sample_period, sample_freq } 1 | ||
449 | sample_type IP|TID|TIME|IDENTIFIER | ||
450 | read_format ID | ||
451 | disabled 1 | ||
452 | inherit 1 | ||
453 | exclude_kernel 1 | ||
454 | exclude_hv 1 | ||
455 | mmap 1 | ||
456 | comm 1 | ||
457 | enable_on_exec 1 | ||
458 | task 1 | ||
459 | sample_id_all 1 | ||
460 | mmap2 1 | ||
461 | comm_exec 1 | ||
462 | ------------------------------------------------------------ | ||
463 | sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 | ||
464 | sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 | ||
465 | sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 | ||
466 | sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 | ||
467 | mmap size 528384B | ||
468 | AUX area mmap length 4194304 | ||
469 | perf event ring buffer mmapped per cpu | ||
470 | Synthesizing auxtrace information | ||
471 | Linux | ||
472 | [ perf record: Woken up 1 times to write data ] | ||
473 | [ perf record: Captured and wrote 0.042 MB perf.data ] | ||
474 | |||
475 | Note, the sched_switch event is only added if the user is permitted to use it | ||
476 | and only in per-cpu mode. | ||
477 | |||
478 | Note also, the sched_switch event is only added if TSC packets are requested. | ||
479 | That is because, in the absence of timing information, the sched_switch events | ||
480 | cannot be matched against the Intel PT trace. | ||
481 | |||
482 | |||
483 | perf script | ||
484 | =========== | ||
485 | |||
486 | By default, perf script will decode trace data found in the perf.data file. | ||
487 | This can be further controlled by new option --itrace. | ||
488 | |||
489 | |||
490 | New --itrace option | ||
491 | ------------------- | ||
492 | |||
493 | Having no option is the same as | ||
494 | |||
495 | --itrace | ||
496 | |||
497 | which, in turn, is the same as | ||
498 | |||
499 | --itrace=ibxe | ||
500 | |||
501 | The letters are: | ||
502 | |||
503 | i synthesize "instructions" events | ||
504 | b synthesize "branches" events | ||
505 | x synthesize "transactions" events | ||
506 | c synthesize branches events (calls only) | ||
507 | r synthesize branches events (returns only) | ||
508 | e synthesize tracing error events | ||
509 | d create a debug log | ||
510 | g synthesize a call chain (use with i or x) | ||
511 | |||
512 | "Instructions" events look like they were recorded by "perf record -e | ||
513 | instructions". | ||
514 | |||
515 | "Branches" events look like they were recorded by "perf record -e branches". "c" | ||
516 | and "r" can be combined to get calls and returns. | ||
517 | |||
518 | "Transactions" events correspond to the start or end of transactions. The | ||
519 | 'flags' field can be used in perf script to determine whether the event is a | ||
520 | tranasaction start, commit or abort. | ||
521 | |||
522 | Error events are new. They show where the decoder lost the trace. Error events | ||
523 | are quite important. Users must know if what they are seeing is a complete | ||
524 | picture or not. | ||
525 | |||
526 | The "d" option will cause the creation of a file "intel_pt.log" containing all | ||
527 | decoded packets and instructions. Note that this option slows down the decoder | ||
528 | and that the resulting file may be very large. | ||
529 | |||
530 | In addition, the period of the "instructions" event can be specified. e.g. | ||
531 | |||
532 | --itrace=i10us | ||
533 | |||
534 | sets the period to 10us i.e. one instruction sample is synthesized for each 10 | ||
535 | microseconds of trace. Alternatives to "us" are "ms" (milliseconds), | ||
536 | "ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). | ||
537 | |||
538 | "ms", "us" and "ns" are converted to TSC ticks. | ||
539 | |||
540 | The timing information included with Intel PT does not give the time of every | ||
541 | instruction. Consequently, for the purpose of sampling, the decoder estimates | ||
542 | the time since the last timing packet based on 1 tick per instruction. The time | ||
543 | on the sample is *not* adjusted and reflects the last known value of TSC. | ||
544 | |||
545 | For Intel PT, the default period is 100us. | ||
546 | |||
547 | Also the call chain size (default 16, max. 1024) for instructions or | ||
548 | transactions events can be specified. e.g. | ||
549 | |||
550 | --itrace=ig32 | ||
551 | --itrace=xg32 | ||
552 | |||
553 | To disable trace decoding entirely, use the option --no-itrace. | ||
554 | |||
555 | |||
556 | dump option | ||
557 | ----------- | ||
558 | |||
559 | perf script has an option (-D) to "dump" the events i.e. display the binary | ||
560 | data. | ||
561 | |||
562 | When -D is used, Intel PT packets are displayed. The packet decoder does not | ||
563 | pay attention to PSB packets, but just decodes the bytes - so the packets seen | ||
564 | by the actual decoder may not be identical in places where the data is corrupt. | ||
565 | One example of that would be when the buffer-switching interrupt has been too | ||
566 | slow, and the buffer has been filled completely. In that case, the last packet | ||
567 | in the buffer might be truncated and immediately followed by a PSB as the trace | ||
568 | continues in the next buffer. | ||
569 | |||
570 | To disable the display of Intel PT packets, combine the -D option with | ||
571 | --no-itrace. | ||
572 | |||
573 | |||
574 | perf report | ||
575 | =========== | ||
576 | |||
577 | By default, perf report will decode trace data found in the perf.data file. | ||
578 | This can be further controlled by new option --itrace exactly the same as | ||
579 | perf script, with the exception that the default is --itrace=igxe. | ||
580 | |||
581 | |||
582 | perf inject | ||
583 | =========== | ||
584 | |||
585 | perf inject also accepts the --itrace option in which case tracing data is | ||
586 | removed and replaced with the synthesized events. e.g. | ||
587 | |||
588 | perf inject --itrace -i perf.data -o perf.data.new | ||
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 139608878888..a8be9f9d0462 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build | |||
@@ -1,5 +1,6 @@ | |||
1 | libperf-y += header.o | 1 | libperf-y += header.o |
2 | libperf-y += tsc.o | 2 | libperf-y += tsc.o |
3 | libperf-y += pmu.o | ||
3 | libperf-y += kvm-stat.o | 4 | libperf-y += kvm-stat.o |
4 | 5 | ||
5 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 6 | libperf-$(CONFIG_DWARF) += dwarf-regs.o |
@@ -7,4 +8,5 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o | |||
7 | libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o | 8 | libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o |
8 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 9 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
9 | 10 | ||
11 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o | ||
10 | libperf-$(CONFIG_AUXTRACE) += intel-pt.o | 12 | libperf-$(CONFIG_AUXTRACE) += intel-pt.o |
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000000..e7654b506312 --- /dev/null +++ b/tools/perf/arch/x86/util/auxtrace.c | |||
@@ -0,0 +1,38 @@ | |||
1 | /* | ||
2 | * auxtrace.c: AUX area tracing support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include "../../util/header.h" | ||
17 | #include "../../util/auxtrace.h" | ||
18 | #include "../../util/intel-pt.h" | ||
19 | |||
20 | struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, | ||
21 | int *err) | ||
22 | { | ||
23 | char buffer[64]; | ||
24 | int ret; | ||
25 | |||
26 | *err = 0; | ||
27 | |||
28 | ret = get_cpuid(buffer, sizeof(buffer)); | ||
29 | if (ret) { | ||
30 | *err = ret; | ||
31 | return NULL; | ||
32 | } | ||
33 | |||
34 | if (!strncmp(buffer, "GenuineIntel,", 13)) | ||
35 | return intel_pt_recording_init(err); | ||
36 | |||
37 | return NULL; | ||
38 | } | ||
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000000..fd11cc3ce780 --- /dev/null +++ b/tools/perf/arch/x86/util/pmu.c | |||
@@ -0,0 +1,15 @@ | |||
1 | #include <string.h> | ||
2 | |||
3 | #include <linux/perf_event.h> | ||
4 | |||
5 | #include "../../util/intel-pt.h" | ||
6 | #include "../../util/pmu.h" | ||
7 | |||
8 | struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) | ||
9 | { | ||
10 | #ifdef HAVE_AUXTRACE_SUPPORT | ||
11 | if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) | ||
12 | return intel_pt_pmu_default_config(pmu); | ||
13 | #endif | ||
14 | return NULL; | ||
15 | } | ||
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 49dbfbeedb68..0f0b7e11e2d9 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #include "debug.h" | 47 | #include "debug.h" |
48 | #include "parse-options.h" | 48 | #include "parse-options.h" |
49 | 49 | ||
50 | #include "intel-pt.h" | ||
51 | |||
50 | int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, | 52 | int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, |
51 | struct auxtrace_mmap_params *mp, | 53 | struct auxtrace_mmap_params *mp, |
52 | void *userpg, int fd) | 54 | void *userpg, int fd) |
@@ -876,7 +878,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) | |||
876 | 878 | ||
877 | int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, | 879 | int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, |
878 | union perf_event *event, | 880 | union perf_event *event, |
879 | struct perf_session *session __maybe_unused) | 881 | struct perf_session *session) |
880 | { | 882 | { |
881 | enum auxtrace_type type = event->auxtrace_info.type; | 883 | enum auxtrace_type type = event->auxtrace_info.type; |
882 | 884 | ||
@@ -885,6 +887,7 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, | |||
885 | 887 | ||
886 | switch (type) { | 888 | switch (type) { |
887 | case PERF_AUXTRACE_INTEL_PT: | 889 | case PERF_AUXTRACE_INTEL_PT: |
890 | return intel_pt_process_auxtrace_info(event, session); | ||
888 | case PERF_AUXTRACE_UNKNOWN: | 891 | case PERF_AUXTRACE_UNKNOWN: |
889 | default: | 892 | default: |
890 | return -EINVAL; | 893 | return -EINVAL; |
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 84cad054d6f7..3c71138e7672 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c | |||
@@ -462,8 +462,8 @@ static struct perf_pmu *pmu_lookup(const char *name) | |||
462 | LIST_HEAD(aliases); | 462 | LIST_HEAD(aliases); |
463 | __u32 type; | 463 | __u32 type; |
464 | 464 | ||
465 | /* No support for intel_bts or intel_pt so disallow them */ | 465 | /* No support for intel_bts so disallow it */ |
466 | if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) | 466 | if (!strcmp(name, "intel_bts")) |
467 | return NULL; | 467 | return NULL; |
468 | 468 | ||
469 | /* | 469 | /* |