aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-08-22 02:45:46 -0400
committerIngo Molnar <mingo@kernel.org>2015-08-22 02:45:46 -0400
commit0e53909a1cf0153736fb52c216558a65530d8c40 (patch)
tree3073fe02fc1398d6a60bbba4399df84d57cc9741 /tools
parent82819ffb42fb45197bacf3223191deca31d3eb91 (diff)
parent1c0bd0e891aaed0219010bfe79b32e1b0b82d662 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Fix segfault using 'perf script --show-mmap-events', affects only current perf/core. (Adrian Hunter) - /proc/kcore requires CAP_SYS_RAWIO message too noisy, make it debug only. (Adrian Hunter) - Fix Intel PT timestamp handling. (Adrian Hunter) - Add Intel BTS support, with a call-graph script to show it and PT in use in a GUI using 'perf script' python scripting with postgresql and Qt. (Adrian Hunter) - Add checks for returned EVENT_ERROR type in libtraceevent, fixing a bug that surfaced on arm64 systems. (Dean Nelson) - Fallback to using kallsyms when libdw fails to handle a vmlinux file, that can happen, for instance, when perf is statically linked and then libdw fails to load libebl_{arch}.so. (Wang Nan) Infrastructure changes: - Initialize reference counts in map__clone(). (Arnaldo Carvalho de Melo) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/lib/traceevent/event-parse.c9
-rw-r--r--tools/perf/Documentation/intel-bts.txt86
-rw-r--r--tools/perf/Documentation/itrace.txt22
-rw-r--r--tools/perf/Documentation/perf-inject.txt23
-rw-r--r--tools/perf/Documentation/perf-report.txt23
-rw-r--r--tools/perf/Documentation/perf-script.txt23
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c49
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c458
-rw-r--r--tools/perf/arch/x86/util/pmu.c3
-rw-r--r--tools/perf/scripts/python/call-graph-from-postgresql.py327
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py47
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/annotate.c1
-rw-r--r--tools/perf/util/auxtrace.c3
-rw-r--r--tools/perf/util/auxtrace.h1
-rw-r--r--tools/perf/util/evlist.c2
-rw-r--r--tools/perf/util/intel-bts.c933
-rw-r--r--tools/perf/util/intel-bts.h43
-rw-r--r--tools/perf/util/intel-pt.c2
-rw-r--r--tools/perf/util/map.c13
-rw-r--r--tools/perf/util/pmu.c4
-rw-r--r--tools/perf/util/probe-event.c7
-rw-r--r--tools/perf/util/symbol.c4
24 files changed, 2004 insertions, 81 deletions
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index fcd8a9e3d2e1..5c1867a13ef2 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1745,6 +1745,9 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
1745 type = process_arg(event, left, &token); 1745 type = process_arg(event, left, &token);
1746 1746
1747 again: 1747 again:
1748 if (type == EVENT_ERROR)
1749 goto out_free;
1750
1748 /* Handle other operations in the arguments */ 1751 /* Handle other operations in the arguments */
1749 if (type == EVENT_OP && strcmp(token, ":") != 0) { 1752 if (type == EVENT_OP && strcmp(token, ":") != 0) {
1750 type = process_op(event, left, &token); 1753 type = process_op(event, left, &token);
@@ -2004,6 +2007,12 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
2004 goto out_warn_free; 2007 goto out_warn_free;
2005 2008
2006 type = process_arg_token(event, right, tok, type); 2009 type = process_arg_token(event, right, tok, type);
2010 if (type == EVENT_ERROR) {
2011 free_arg(right);
2012 /* token was freed in process_arg_token() via *tok */
2013 token = NULL;
2014 goto out_free;
2015 }
2007 2016
2008 if (right->type == PRINT_OP && 2017 if (right->type == PRINT_OP &&
2009 get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { 2018 get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
new file mode 100644
index 000000000000..8bdc93bd7fdb
--- /dev/null
+++ b/tools/perf/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
1Intel Branch Trace Store
2========================
3
4Overview
5========
6
7Intel BTS could be regarded as a predecessor to Intel PT and has some
8similarities because it can also identify every branch a program takes. A
9notable difference is that Intel BTS has no timing information and as a
10consequence the present implementation is limited to per-thread recording.
11
12While decoding Intel BTS does not require walking the object code, the object
13code is still needed to pair up calls and returns correctly, consequently much
14of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
15documentation and consider that the PMU 'intel_bts' can usually be used in
16place of 'intel_pt' in the examples provided, with the proviso that per-thread
17recording must also be stipulated i.e. the --per-thread option for
18'perf record'.
19
20
21perf record
22===========
23
24new event
25---------
26
27The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
28option is:
29
30 -e intel_bts//
31
32Currently Intel BTS is limited to per-thread tracing so the --per-thread option
33is also needed.
34
35
36snapshot option
37---------------
38
39The snapshot option is the same as Intel PT (refer Intel PT documentation).
40
41
42auxtrace mmap size option
43-----------------------
44
45The mmap size option is the same as Intel PT (refer Intel PT documentation).
46
47
48perf script
49===========
50
51By default, perf script will decode trace data found in the perf.data file.
52This can be further controlled by option --itrace. The --itrace option is
53the same as Intel PT (refer Intel PT documentation) except that neither
54"instructions" events nor "transactions" events (and consequently call
55chains) are supported.
56
57To disable trace decoding entirely, use the option --no-itrace.
58
59
60dump option
61-----------
62
63perf script has an option (-D) to "dump" the events i.e. display the binary
64data.
65
66When -D is used, Intel BTS packets are displayed.
67
68To disable the display of Intel BTS packets, combine the -D option with
69--no-itrace.
70
71
72perf report
73===========
74
75By default, perf report will decode trace data found in the perf.data file.
76This can be further controlled by new option --itrace exactly the same as
77perf script.
78
79
80perf inject
81===========
82
83perf inject also accepts the --itrace option in which case tracing data is
84removed and replaced with the synthesized events. e.g.
85
86 perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
new file mode 100644
index 000000000000..2ff946677e3b
--- /dev/null
+++ b/tools/perf/Documentation/itrace.txt
@@ -0,0 +1,22 @@
1 i synthesize instructions events
2 b synthesize branches events
3 c synthesize branches events (calls only)
4 r synthesize branches events (returns only)
5 x synthesize transactions events
6 e synthesize error events
7 d create a debug log
8 g synthesize a call chain (use with i or x)
9
10 The default is all events i.e. the same as --itrace=ibxe
11
12 In addition, the period (default 100000) for instructions events
13 can be specified in units of:
14
15 i instructions
16 t ticks
17 ms milliseconds
18 us microseconds
19 ns nanoseconds (default)
20
21 Also the call chain size (default 16, max. 1024) for instructions or
22 transactions events can be specified.
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index b876ae312699..0c721c3e37e1 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -48,28 +48,7 @@ OPTIONS
48 Decode Instruction Tracing data, replacing it with synthesized events. 48 Decode Instruction Tracing data, replacing it with synthesized events.
49 Options are: 49 Options are:
50 50
51 i synthesize instructions events 51include::itrace.txt[]
52 b synthesize branches events
53 c synthesize branches events (calls only)
54 r synthesize branches events (returns only)
55 x synthesize transactions events
56 e synthesize error events
57 d create a debug log
58 g synthesize a call chain (use with i or x)
59
60 The default is all events i.e. the same as --itrace=ibxe
61
62 In addition, the period (default 100000) for instructions events
63 can be specified in units of:
64
65 i instructions
66 t ticks
67 ms milliseconds
68 us microseconds
69 ns nanoseconds (default)
70
71 Also the call chain size (default 16, max. 1024) for instructions or
72 transactions events can be specified.
73 52
74SEE ALSO 53SEE ALSO
75-------- 54--------
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a18ba757a0ed..9c7981bfddad 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -331,28 +331,7 @@ OPTIONS
331--itrace:: 331--itrace::
332 Options for decoding instruction tracing data. The options are: 332 Options for decoding instruction tracing data. The options are:
333 333
334 i synthesize instructions events 334include::itrace.txt[]
335 b synthesize branches events
336 c synthesize branches events (calls only)
337 r synthesize branches events (returns only)
338 x synthesize transactions events
339 e synthesize error events
340 d create a debug log
341 g synthesize a call chain (use with i or x)
342
343 The default is all events i.e. the same as --itrace=ibxe
344
345 In addition, the period (default 100000) for instructions events
346 can be specified in units of:
347
348 i instructions
349 t ticks
350 ms milliseconds
351 us microseconds
352 ns nanoseconds (default)
353
354 Also the call chain size (default 16, max. 1024) for instructions or
355 transactions events can be specified.
356 335
357 To disable decoding entirely, use --no-itrace. 336 To disable decoding entirely, use --no-itrace.
358 337
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 8e9be1f9c1dd..c0d24791a7f3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -235,28 +235,7 @@ OPTIONS
235--itrace:: 235--itrace::
236 Options for decoding instruction tracing data. The options are: 236 Options for decoding instruction tracing data. The options are:
237 237
238 i synthesize instructions events 238include::itrace.txt[]
239 b synthesize branches events
240 c synthesize branches events (calls only)
241 r synthesize branches events (returns only)
242 x synthesize transactions events
243 e synthesize error events
244 d create a debug log
245 g synthesize a call chain (use with i or x)
246
247 The default is all events i.e. the same as --itrace=ibxe
248
249 In addition, the period (default 100000) for instructions events
250 can be specified in units of:
251
252 i instructions
253 t ticks
254 ms milliseconds
255 us microseconds
256 ns nanoseconds (default)
257
258 Also the call chain size (default 16, max. 1024) for instructions or
259 transactions events can be specified.
260 239
261 To disable decoding entirely, use --no-itrace. 240 To disable decoding entirely, use --no-itrace.
262 241
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index a8be9f9d0462..2c55e1b336c5 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -10,3 +10,4 @@ libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
10 10
11libperf-$(CONFIG_AUXTRACE) += auxtrace.o 11libperf-$(CONFIG_AUXTRACE) += auxtrace.o
12libperf-$(CONFIG_AUXTRACE) += intel-pt.o 12libperf-$(CONFIG_AUXTRACE) += intel-pt.o
13libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index e7654b506312..7a7805583e3f 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -13,11 +13,56 @@
13 * 13 *
14 */ 14 */
15 15
16#include <stdbool.h>
17
16#include "../../util/header.h" 18#include "../../util/header.h"
19#include "../../util/debug.h"
20#include "../../util/pmu.h"
17#include "../../util/auxtrace.h" 21#include "../../util/auxtrace.h"
18#include "../../util/intel-pt.h" 22#include "../../util/intel-pt.h"
23#include "../../util/intel-bts.h"
24#include "../../util/evlist.h"
25
26static
27struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
28 int *err)
29{
30 struct perf_pmu *intel_pt_pmu;
31 struct perf_pmu *intel_bts_pmu;
32 struct perf_evsel *evsel;
33 bool found_pt = false;
34 bool found_bts = false;
35
36 intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
37 intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
38
39 if (evlist) {
40 evlist__for_each(evlist, evsel) {
41 if (intel_pt_pmu &&
42 evsel->attr.type == intel_pt_pmu->type)
43 found_pt = true;
44 if (intel_bts_pmu &&
45 evsel->attr.type == intel_bts_pmu->type)
46 found_bts = true;
47 }
48 }
49
50 if (found_pt && found_bts) {
51 pr_err("intel_pt and intel_bts may not be used together\n");
52 *err = -EINVAL;
53 return NULL;
54 }
55
56 if (found_pt)
57 return intel_pt_recording_init(err);
58
59 if (found_bts)
60 return intel_bts_recording_init(err);
19 61
20struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, 62 return NULL;
63}
64
65struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
21 int *err) 66 int *err)
22{ 67{
23 char buffer[64]; 68 char buffer[64];
@@ -32,7 +77,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe
32 } 77 }
33 78
34 if (!strncmp(buffer, "GenuineIntel,", 13)) 79 if (!strncmp(buffer, "GenuineIntel,", 13))
35 return intel_pt_recording_init(err); 80 return auxtrace_record__init_intel(evlist, err);
36 81
37 return NULL; 82 return NULL;
38} 83}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
new file mode 100644
index 000000000000..9b94ce520917
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -0,0 +1,458 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <linux/kernel.h>
17#include <linux/types.h>
18#include <linux/bitops.h>
19#include <linux/log2.h>
20
21#include "../../util/cpumap.h"
22#include "../../util/evsel.h"
23#include "../../util/evlist.h"
24#include "../../util/session.h"
25#include "../../util/util.h"
26#include "../../util/pmu.h"
27#include "../../util/debug.h"
28#include "../../util/tsc.h"
29#include "../../util/auxtrace.h"
30#include "../../util/intel-bts.h"
31
32#define KiB(x) ((x) * 1024)
33#define MiB(x) ((x) * 1024 * 1024)
34#define KiB_MASK(x) (KiB(x) - 1)
35#define MiB_MASK(x) (MiB(x) - 1)
36
37#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4)
38
39#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60)
40
41struct intel_bts_snapshot_ref {
42 void *ref_buf;
43 size_t ref_offset;
44 bool wrapped;
45};
46
47struct intel_bts_recording {
48 struct auxtrace_record itr;
49 struct perf_pmu *intel_bts_pmu;
50 struct perf_evlist *evlist;
51 bool snapshot_mode;
52 size_t snapshot_size;
53 int snapshot_ref_cnt;
54 struct intel_bts_snapshot_ref *snapshot_refs;
55};
56
57struct branch {
58 u64 from;
59 u64 to;
60 u64 misc;
61};
62
63static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
64{
65 return INTEL_BTS_AUXTRACE_PRIV_SIZE;
66}
67
68static int intel_bts_info_fill(struct auxtrace_record *itr,
69 struct perf_session *session,
70 struct auxtrace_info_event *auxtrace_info,
71 size_t priv_size)
72{
73 struct intel_bts_recording *btsr =
74 container_of(itr, struct intel_bts_recording, itr);
75 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
76 struct perf_event_mmap_page *pc;
77 struct perf_tsc_conversion tc = { .time_mult = 0, };
78 bool cap_user_time_zero = false;
79 int err;
80
81 if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
82 return -EINVAL;
83
84 if (!session->evlist->nr_mmaps)
85 return -EINVAL;
86
87 pc = session->evlist->mmap[0].base;
88 if (pc) {
89 err = perf_read_tsc_conversion(pc, &tc);
90 if (err) {
91 if (err != -EOPNOTSUPP)
92 return err;
93 } else {
94 cap_user_time_zero = tc.time_mult != 0;
95 }
96 if (!cap_user_time_zero)
97 ui__warning("Intel BTS: TSC not available\n");
98 }
99
100 auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
101 auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
102 auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
103 auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
104 auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
105 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
106 auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
107
108 return 0;
109}
110
111static int intel_bts_recording_options(struct auxtrace_record *itr,
112 struct perf_evlist *evlist,
113 struct record_opts *opts)
114{
115 struct intel_bts_recording *btsr =
116 container_of(itr, struct intel_bts_recording, itr);
117 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
118 struct perf_evsel *evsel, *intel_bts_evsel = NULL;
119 const struct cpu_map *cpus = evlist->cpus;
120 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
121
122 btsr->evlist = evlist;
123 btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
124
125 evlist__for_each(evlist, evsel) {
126 if (evsel->attr.type == intel_bts_pmu->type) {
127 if (intel_bts_evsel) {
128 pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
129 return -EINVAL;
130 }
131 evsel->attr.freq = 0;
132 evsel->attr.sample_period = 1;
133 intel_bts_evsel = evsel;
134 opts->full_auxtrace = true;
135 }
136 }
137
138 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
139 pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
140 return -EINVAL;
141 }
142
143 if (!opts->full_auxtrace)
144 return 0;
145
146 if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
147 pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
148 return -EINVAL;
149 }
150
151 /* Set default sizes for snapshot mode */
152 if (opts->auxtrace_snapshot_mode) {
153 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
154 if (privileged) {
155 opts->auxtrace_mmap_pages = MiB(4) / page_size;
156 } else {
157 opts->auxtrace_mmap_pages = KiB(128) / page_size;
158 if (opts->mmap_pages == UINT_MAX)
159 opts->mmap_pages = KiB(256) / page_size;
160 }
161 } else if (!opts->auxtrace_mmap_pages && !privileged &&
162 opts->mmap_pages == UINT_MAX) {
163 opts->mmap_pages = KiB(256) / page_size;
164 }
165 if (!opts->auxtrace_snapshot_size)
166 opts->auxtrace_snapshot_size =
167 opts->auxtrace_mmap_pages * (size_t)page_size;
168 if (!opts->auxtrace_mmap_pages) {
169 size_t sz = opts->auxtrace_snapshot_size;
170
171 sz = round_up(sz, page_size) / page_size;
172 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
173 }
174 if (opts->auxtrace_snapshot_size >
175 opts->auxtrace_mmap_pages * (size_t)page_size) {
176 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
177 opts->auxtrace_snapshot_size,
178 opts->auxtrace_mmap_pages * (size_t)page_size);
179 return -EINVAL;
180 }
181 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
182 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
183 return -EINVAL;
184 }
185 pr_debug2("Intel BTS snapshot size: %zu\n",
186 opts->auxtrace_snapshot_size);
187 }
188
189 /* Set default sizes for full trace mode */
190 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
191 if (privileged) {
192 opts->auxtrace_mmap_pages = MiB(4) / page_size;
193 } else {
194 opts->auxtrace_mmap_pages = KiB(128) / page_size;
195 if (opts->mmap_pages == UINT_MAX)
196 opts->mmap_pages = KiB(256) / page_size;
197 }
198 }
199
200 /* Validate auxtrace_mmap_pages */
201 if (opts->auxtrace_mmap_pages) {
202 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
203 size_t min_sz;
204
205 if (opts->auxtrace_snapshot_mode)
206 min_sz = KiB(4);
207 else
208 min_sz = KiB(8);
209
210 if (sz < min_sz || !is_power_of_2(sz)) {
211 pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
212 min_sz / 1024);
213 return -EINVAL;
214 }
215 }
216
217 if (intel_bts_evsel) {
218 /*
219 * To obtain the auxtrace buffer file descriptor, the auxtrace event
220 * must come first.
221 */
222 perf_evlist__to_front(evlist, intel_bts_evsel);
223 /*
224 * In the case of per-cpu mmaps, we need the CPU on the
225 * AUX event.
226 */
227 if (!cpu_map__empty(cpus))
228 perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
229 }
230
231 /* Add dummy event to keep tracking */
232 if (opts->full_auxtrace) {
233 struct perf_evsel *tracking_evsel;
234 int err;
235
236 err = parse_events(evlist, "dummy:u", NULL);
237 if (err)
238 return err;
239
240 tracking_evsel = perf_evlist__last(evlist);
241
242 perf_evlist__set_tracking_event(evlist, tracking_evsel);
243
244 tracking_evsel->attr.freq = 0;
245 tracking_evsel->attr.sample_period = 1;
246 }
247
248 return 0;
249}
250
251static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
252 struct record_opts *opts,
253 const char *str)
254{
255 struct intel_bts_recording *btsr =
256 container_of(itr, struct intel_bts_recording, itr);
257 unsigned long long snapshot_size = 0;
258 char *endptr;
259
260 if (str) {
261 snapshot_size = strtoull(str, &endptr, 0);
262 if (*endptr || snapshot_size > SIZE_MAX)
263 return -1;
264 }
265
266 opts->auxtrace_snapshot_mode = true;
267 opts->auxtrace_snapshot_size = snapshot_size;
268
269 btsr->snapshot_size = snapshot_size;
270
271 return 0;
272}
273
274static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
275{
276 return rdtsc();
277}
278
279static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
280 int idx)
281{
282 const size_t sz = sizeof(struct intel_bts_snapshot_ref);
283 int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
284 struct intel_bts_snapshot_ref *refs;
285
286 if (!new_cnt)
287 new_cnt = 16;
288
289 while (new_cnt <= idx)
290 new_cnt *= 2;
291
292 refs = calloc(new_cnt, sz);
293 if (!refs)
294 return -ENOMEM;
295
296 memcpy(refs, btsr->snapshot_refs, cnt * sz);
297
298 btsr->snapshot_refs = refs;
299 btsr->snapshot_ref_cnt = new_cnt;
300
301 return 0;
302}
303
304static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
305{
306 int i;
307
308 for (i = 0; i < btsr->snapshot_ref_cnt; i++)
309 zfree(&btsr->snapshot_refs[i].ref_buf);
310 zfree(&btsr->snapshot_refs);
311}
312
313static void intel_bts_recording_free(struct auxtrace_record *itr)
314{
315 struct intel_bts_recording *btsr =
316 container_of(itr, struct intel_bts_recording, itr);
317
318 intel_bts_free_snapshot_refs(btsr);
319 free(btsr);
320}
321
322static int intel_bts_snapshot_start(struct auxtrace_record *itr)
323{
324 struct intel_bts_recording *btsr =
325 container_of(itr, struct intel_bts_recording, itr);
326 struct perf_evsel *evsel;
327
328 evlist__for_each(btsr->evlist, evsel) {
329 if (evsel->attr.type == btsr->intel_bts_pmu->type)
330 return perf_evlist__disable_event(btsr->evlist, evsel);
331 }
332 return -EINVAL;
333}
334
335static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
336{
337 struct intel_bts_recording *btsr =
338 container_of(itr, struct intel_bts_recording, itr);
339 struct perf_evsel *evsel;
340
341 evlist__for_each(btsr->evlist, evsel) {
342 if (evsel->attr.type == btsr->intel_bts_pmu->type)
343 return perf_evlist__enable_event(btsr->evlist, evsel);
344 }
345 return -EINVAL;
346}
347
348static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
349{
350 int i, a, b;
351
352 b = buf_size >> 3;
353 a = b - 512;
354 if (a < 0)
355 a = 0;
356
357 for (i = a; i < b; i++) {
358 if (data[i])
359 return true;
360 }
361
362 return false;
363}
364
365static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
366 struct auxtrace_mmap *mm, unsigned char *data,
367 u64 *head, u64 *old)
368{
369 struct intel_bts_recording *btsr =
370 container_of(itr, struct intel_bts_recording, itr);
371 bool wrapped;
372 int err;
373
374 pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
375 __func__, idx, (size_t)*old, (size_t)*head);
376
377 if (idx >= btsr->snapshot_ref_cnt) {
378 err = intel_bts_alloc_snapshot_refs(btsr, idx);
379 if (err)
380 goto out_err;
381 }
382
383 wrapped = btsr->snapshot_refs[idx].wrapped;
384 if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
385 btsr->snapshot_refs[idx].wrapped = true;
386 wrapped = true;
387 }
388
389 /*
390 * In full trace mode 'head' continually increases. However in snapshot
391 * mode 'head' is an offset within the buffer. Here 'old' and 'head'
392 * are adjusted to match the full trace case which expects that 'old' is
393 * always less than 'head'.
394 */
395 if (wrapped) {
396 *old = *head;
397 *head += mm->len;
398 } else {
399 if (mm->mask)
400 *old &= mm->mask;
401 else
402 *old %= mm->len;
403 if (*old > *head)
404 *head += mm->len;
405 }
406
407 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
408 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
409
410 return 0;
411
412out_err:
413 pr_err("%s: failed, error %d\n", __func__, err);
414 return err;
415}
416
417static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
418{
419 struct intel_bts_recording *btsr =
420 container_of(itr, struct intel_bts_recording, itr);
421 struct perf_evsel *evsel;
422
423 evlist__for_each(btsr->evlist, evsel) {
424 if (evsel->attr.type == btsr->intel_bts_pmu->type)
425 return perf_evlist__enable_event_idx(btsr->evlist,
426 evsel, idx);
427 }
428 return -EINVAL;
429}
430
431struct auxtrace_record *intel_bts_recording_init(int *err)
432{
433 struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
434 struct intel_bts_recording *btsr;
435
436 if (!intel_bts_pmu)
437 return NULL;
438
439 btsr = zalloc(sizeof(struct intel_bts_recording));
440 if (!btsr) {
441 *err = -ENOMEM;
442 return NULL;
443 }
444
445 btsr->intel_bts_pmu = intel_bts_pmu;
446 btsr->itr.recording_options = intel_bts_recording_options;
447 btsr->itr.info_priv_size = intel_bts_info_priv_size;
448 btsr->itr.info_fill = intel_bts_info_fill;
449 btsr->itr.free = intel_bts_recording_free;
450 btsr->itr.snapshot_start = intel_bts_snapshot_start;
451 btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
452 btsr->itr.find_snapshot = intel_bts_find_snapshot;
453 btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
454 btsr->itr.reference = intel_bts_reference;
455 btsr->itr.read_finish = intel_bts_read_finish;
456 btsr->itr.alignment = sizeof(struct branch);
457 return &btsr->itr;
458}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index fd11cc3ce780..79fe07158d00 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -3,6 +3,7 @@
3#include <linux/perf_event.h> 3#include <linux/perf_event.h>
4 4
5#include "../../util/intel-pt.h" 5#include "../../util/intel-pt.h"
6#include "../../util/intel-bts.h"
6#include "../../util/pmu.h" 7#include "../../util/pmu.h"
7 8
8struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) 9struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
@@ -10,6 +11,8 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
10#ifdef HAVE_AUXTRACE_SUPPORT 11#ifdef HAVE_AUXTRACE_SUPPORT
11 if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) 12 if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
12 return intel_pt_pmu_default_config(pmu); 13 return intel_pt_pmu_default_config(pmu);
14 if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
15 pmu->selectable = true;
13#endif 16#endif
14 return NULL; 17 return NULL;
15} 18}
diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py
new file mode 100644
index 000000000000..e78fdc2a5a9d
--- /dev/null
+++ b/tools/perf/scripts/python/call-graph-from-postgresql.py
@@ -0,0 +1,327 @@
1#!/usr/bin/python2
2# call-graph-from-postgresql.py: create call-graph from postgresql database
3# Copyright (c) 2014, Intel Corporation.
4#
5# This program is free software; you can redistribute it and/or modify it
6# under the terms and conditions of the GNU General Public License,
7# version 2, as published by the Free Software Foundation.
8#
9# This program is distributed in the hope it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12# more details.
13
14# To use this script you will need to have exported data using the
15# export-to-postgresql.py script. Refer to that script for details.
16#
17# Following on from the example in the export-to-postgresql.py script, a
18# call-graph can be displayed for the pt_example database like this:
19#
20# python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example
21#
22# Note this script supports connecting to remote databases by setting hostname,
23# port, username, password, and dbname e.g.
24#
25# python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
26#
27# The result is a GUI window with a tree representing a context-sensitive
28# call-graph. Expanding a couple of levels of the tree and adjusting column
29# widths to suit will display something like:
30#
31# Call Graph: pt_example
32# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
33# v- ls
34# v- 2638:2638
35# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
36# |- unknown unknown 1 13198 0.1 1 0.0
37# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
38# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
39# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
40# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
41# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
42# >- __libc_csu_init ls 1 10354 0.1 10 0.0
43# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
44# v- main ls 1 8182043 99.6 180254 99.9
45#
46# Points to note:
47# The top level is a command name (comm)
48# The next level is a thread (pid:tid)
49# Subsequent levels are functions
50# 'Count' is the number of calls
51# 'Time' is the elapsed time until the function returns
52# Percentages are relative to the level above
53# 'Branch Count' is the total number of branches for that function and all
54# functions that it calls
55
56import sys
57from PySide.QtCore import *
58from PySide.QtGui import *
59from PySide.QtSql import *
60from decimal import *
61
62class TreeItem():
63
64 def __init__(self, db, row, parent_item):
65 self.db = db
66 self.row = row
67 self.parent_item = parent_item
68 self.query_done = False;
69 self.child_count = 0
70 self.child_items = []
71 self.data = ["", "", "", "", "", "", ""]
72 self.comm_id = 0
73 self.thread_id = 0
74 self.call_path_id = 1
75 self.branch_count = 0
76 self.time = 0
77 if not parent_item:
78 self.setUpRoot()
79
80 def setUpRoot(self):
81 self.query_done = True
82 query = QSqlQuery(self.db)
83 ret = query.exec_('SELECT id, comm FROM comms')
84 if not ret:
85 raise Exception("Query failed: " + query.lastError().text())
86 while query.next():
87 if not query.value(0):
88 continue
89 child_item = TreeItem(self.db, self.child_count, self)
90 self.child_items.append(child_item)
91 self.child_count += 1
92 child_item.setUpLevel1(query.value(0), query.value(1))
93
94 def setUpLevel1(self, comm_id, comm):
95 self.query_done = True;
96 self.comm_id = comm_id
97 self.data[0] = comm
98 self.child_items = []
99 self.child_count = 0
100 query = QSqlQuery(self.db)
101 ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
102 if not ret:
103 raise Exception("Query failed: " + query.lastError().text())
104 while query.next():
105 child_item = TreeItem(self.db, self.child_count, self)
106 self.child_items.append(child_item)
107 self.child_count += 1
108 child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
109
110 def setUpLevel2(self, comm_id, thread_id, pid, tid):
111 self.comm_id = comm_id
112 self.thread_id = thread_id
113 self.data[0] = str(pid) + ":" + str(tid)
114
115 def getChildItem(self, row):
116 return self.child_items[row]
117
118 def getParentItem(self):
119 return self.parent_item
120
121 def getRow(self):
122 return self.row
123
124 def timePercent(self, b):
125 if not self.time:
126 return "0.0"
127 x = (b * Decimal(100)) / self.time
128 return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
129
130 def branchPercent(self, b):
131 if not self.branch_count:
132 return "0.0"
133 x = (b * Decimal(100)) / self.branch_count
134 return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
135
136 def addChild(self, call_path_id, name, dso, count, time, branch_count):
137 child_item = TreeItem(self.db, self.child_count, self)
138 child_item.comm_id = self.comm_id
139 child_item.thread_id = self.thread_id
140 child_item.call_path_id = call_path_id
141 child_item.branch_count = branch_count
142 child_item.time = time
143 child_item.data[0] = name
144 if dso == "[kernel.kallsyms]":
145 dso = "[kernel]"
146 child_item.data[1] = dso
147 child_item.data[2] = str(count)
148 child_item.data[3] = str(time)
149 child_item.data[4] = self.timePercent(time)
150 child_item.data[5] = str(branch_count)
151 child_item.data[6] = self.branchPercent(branch_count)
152 self.child_items.append(child_item)
153 self.child_count += 1
154
155 def selectCalls(self):
156 self.query_done = True;
157 query = QSqlQuery(self.db)
158 ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
159 '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
160 '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
161 '( SELECT ip FROM call_paths where id = call_path_id ) '
162 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
163 'ORDER BY call_path_id')
164 if not ret:
165 raise Exception("Query failed: " + query.lastError().text())
166 last_call_path_id = 0
167 name = ""
168 dso = ""
169 count = 0
170 branch_count = 0
171 total_branch_count = 0
172 time = 0
173 total_time = 0
174 while query.next():
175 if query.value(1) == last_call_path_id:
176 count += 1
177 branch_count += query.value(2)
178 time += query.value(4) - query.value(3)
179 else:
180 if count:
181 self.addChild(last_call_path_id, name, dso, count, time, branch_count)
182 last_call_path_id = query.value(1)
183 name = query.value(5)
184 dso = query.value(6)
185 count = 1
186 total_branch_count += branch_count
187 total_time += time
188 branch_count = query.value(2)
189 time = query.value(4) - query.value(3)
190 if count:
191 self.addChild(last_call_path_id, name, dso, count, time, branch_count)
192 total_branch_count += branch_count
193 total_time += time
194 # Top level does not have time or branch count, so fix that here
195 if total_branch_count > self.branch_count:
196 self.branch_count = total_branch_count
197 if self.branch_count:
198 for child_item in self.child_items:
199 child_item.data[6] = self.branchPercent(child_item.branch_count)
200 if total_time > self.time:
201 self.time = total_time
202 if self.time:
203 for child_item in self.child_items:
204 child_item.data[4] = self.timePercent(child_item.time)
205
206 def childCount(self):
207 if not self.query_done:
208 self.selectCalls()
209 return self.child_count
210
211 def columnCount(self):
212 return 7
213
214 def columnHeader(self, column):
215 headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
216 return headers[column]
217
218 def getData(self, column):
219 return self.data[column]
220
221class TreeModel(QAbstractItemModel):
222
223 def __init__(self, db, parent=None):
224 super(TreeModel, self).__init__(parent)
225 self.db = db
226 self.root = TreeItem(db, 0, None)
227
228 def columnCount(self, parent):
229 return self.root.columnCount()
230
231 def rowCount(self, parent):
232 if parent.isValid():
233 parent_item = parent.internalPointer()
234 else:
235 parent_item = self.root
236 return parent_item.childCount()
237
238 def headerData(self, section, orientation, role):
239 if role == Qt.TextAlignmentRole:
240 if section > 1:
241 return Qt.AlignRight
242 if role != Qt.DisplayRole:
243 return None
244 if orientation != Qt.Horizontal:
245 return None
246 return self.root.columnHeader(section)
247
248 def parent(self, child):
249 child_item = child.internalPointer()
250 if child_item is self.root:
251 return QModelIndex()
252 parent_item = child_item.getParentItem()
253 return self.createIndex(parent_item.getRow(), 0, parent_item)
254
255 def index(self, row, column, parent):
256 if parent.isValid():
257 parent_item = parent.internalPointer()
258 else:
259 parent_item = self.root
260 child_item = parent_item.getChildItem(row)
261 return self.createIndex(row, column, child_item)
262
263 def data(self, index, role):
264 if role == Qt.TextAlignmentRole:
265 if index.column() > 1:
266 return Qt.AlignRight
267 if role != Qt.DisplayRole:
268 return None
269 index_item = index.internalPointer()
270 return index_item.getData(index.column())
271
272class MainWindow(QMainWindow):
273
274 def __init__(self, db, dbname, parent=None):
275 super(MainWindow, self).__init__(parent)
276
277 self.setObjectName("MainWindow")
278 self.setWindowTitle("Call Graph: " + dbname)
279 self.move(100, 100)
280 self.resize(800, 600)
281 style = self.style()
282 icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
283 self.setWindowIcon(icon);
284
285 self.model = TreeModel(db)
286
287 self.view = QTreeView()
288 self.view.setModel(self.model)
289
290 self.setCentralWidget(self.view)
291
292if __name__ == '__main__':
293 if (len(sys.argv) < 2):
294 print >> sys.stderr, "Usage is: call-graph-from-postgresql.py <database name>"
295 raise Exception("Too few arguments")
296
297 dbname = sys.argv[1]
298
299 db = QSqlDatabase.addDatabase('QPSQL')
300
301 opts = dbname.split()
302 for opt in opts:
303 if '=' in opt:
304 opt = opt.split('=')
305 if opt[0] == 'hostname':
306 db.setHostName(opt[1])
307 elif opt[0] == 'port':
308 db.setPort(int(opt[1]))
309 elif opt[0] == 'username':
310 db.setUserName(opt[1])
311 elif opt[0] == 'password':
312 db.setPassword(opt[1])
313 elif opt[0] == 'dbname':
314 dbname = opt[1]
315 else:
316 dbname = opt
317
318 db.setDatabaseName(dbname)
319 if not db.open():
320 raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
321
322 app = QApplication(sys.argv)
323 window = MainWindow(db, dbname)
324 window.show()
325 err = app.exec_()
326 db.close()
327 sys.exit(err)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 4cdafd880074..84a32037a80f 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -15,6 +15,53 @@ import sys
15import struct 15import struct
16import datetime 16import datetime
17 17
18# To use this script you will need to have installed package python-pyside which
19# provides LGPL-licensed Python bindings for Qt. You will also need the package
20# libqt4-sql-psql for Qt postgresql support.
21#
22# The script assumes postgresql is running on the local machine and that the
23# user has postgresql permissions to create databases. Examples of installing
24# postgresql and adding such a user are:
25#
26# fedora:
27#
28# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
29# $ sudo su - postgres -c initdb
30# $ sudo service postgresql start
31# $ sudo su - postgres
32# $ createuser <your user id here>
33# Shall the new role be a superuser? (y/n) y
34#
35# ubuntu:
36#
37# $ sudo apt-get install postgresql
38# $ sudo su - postgres
39# $ createuser <your user id here>
40# Shall the new role be a superuser? (y/n) y
41#
42# An example of using this script with Intel PT:
43#
44# $ perf record -e intel_pt//u ls
45# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls
46# 2015-05-29 12:49:23.464364 Creating database...
47# 2015-05-29 12:49:26.281717 Writing to intermediate files...
48# 2015-05-29 12:49:27.190383 Copying to database...
49# 2015-05-29 12:49:28.140451 Removing intermediate files...
50# 2015-05-29 12:49:28.147451 Adding primary keys
51# 2015-05-29 12:49:28.655683 Adding foreign keys
52# 2015-05-29 12:49:29.365350 Done
53#
54# To browse the database, psql can be used e.g.
55#
56# $ psql pt_example
57# pt_example=# select * from samples_view where id < 100;
58# pt_example=# \d+
59# pt_example=# \d+ samples_view
60# pt_example=# \q
61#
62# An example of using the database is provided by the script
63# call-graph-from-postgresql.py. Refer to that script for details.
64
18from PySide.QtSql import * 65from PySide.QtSql import *
19 66
20# Need to access PostgreSQL C library directly to use COPY FROM STDIN 67# Need to access PostgreSQL C library directly to use COPY FROM STDIN
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c20473d1369e..e912856cc4e5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -80,6 +80,7 @@ libperf-y += thread-stack.o
80libperf-$(CONFIG_AUXTRACE) += auxtrace.o 80libperf-$(CONFIG_AUXTRACE) += auxtrace.o
81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
82libperf-$(CONFIG_AUXTRACE) += intel-pt.o 82libperf-$(CONFIG_AUXTRACE) += intel-pt.o
83libperf-$(CONFIG_AUXTRACE) += intel-bts.o
83libperf-y += parse-branch-options.o 84libperf-y += parse-branch-options.o
84 85
85libperf-$(CONFIG_LIBELF) += symbol-elf.o 86libperf-$(CONFIG_LIBELF) += symbol-elf.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 8a18347709e1..d1eece70b84d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1126,6 +1126,7 @@ fallback:
1126 dso->annotate_warned = 1; 1126 dso->annotate_warned = 1;
1127 pr_err("Can't annotate %s:\n\n" 1127 pr_err("Can't annotate %s:\n\n"
1128 "No vmlinux file%s\nwas found in the path.\n\n" 1128 "No vmlinux file%s\nwas found in the path.\n\n"
1129 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
1129 "Please use:\n\n" 1130 "Please use:\n\n"
1130 " perf buildid-cache -vu vmlinux\n\n" 1131 " perf buildid-cache -vu vmlinux\n\n"
1131 "or:\n\n" 1132 "or:\n\n"
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 0f0b7e11e2d9..a980e7c50ee0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -48,6 +48,7 @@
48#include "parse-options.h" 48#include "parse-options.h"
49 49
50#include "intel-pt.h" 50#include "intel-pt.h"
51#include "intel-bts.h"
51 52
52int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, 53int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
53 struct auxtrace_mmap_params *mp, 54 struct auxtrace_mmap_params *mp,
@@ -888,6 +889,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
888 switch (type) { 889 switch (type) {
889 case PERF_AUXTRACE_INTEL_PT: 890 case PERF_AUXTRACE_INTEL_PT:
890 return intel_pt_process_auxtrace_info(event, session); 891 return intel_pt_process_auxtrace_info(event, session);
892 case PERF_AUXTRACE_INTEL_BTS:
893 return intel_bts_process_auxtrace_info(event, session);
891 case PERF_AUXTRACE_UNKNOWN: 894 case PERF_AUXTRACE_UNKNOWN:
892 default: 895 default:
893 return -EINVAL; 896 return -EINVAL;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 7d12f33a3a06..bf72b77a588a 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -40,6 +40,7 @@ struct events_stats;
40enum auxtrace_type { 40enum auxtrace_type {
41 PERF_AUXTRACE_UNKNOWN, 41 PERF_AUXTRACE_UNKNOWN,
42 PERF_AUXTRACE_INTEL_PT, 42 PERF_AUXTRACE_INTEL_PT,
43 PERF_AUXTRACE_INTEL_BTS,
43}; 44};
44 45
45enum itrace_period_type { 46enum itrace_period_type {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 373f65b02545..e9a5d432902c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -573,7 +573,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
573{ 573{
574 struct perf_sample_id *sid; 574 struct perf_sample_id *sid;
575 575
576 if (evlist->nr_entries == 1) 576 if (evlist->nr_entries == 1 || !id)
577 return perf_evlist__first(evlist); 577 return perf_evlist__first(evlist);
578 578
579 sid = perf_evlist__id2sid(evlist, id); 579 sid = perf_evlist__id2sid(evlist, id);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000000..ea768625ab5b
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,933 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <endian.h>
17#include <byteswap.h>
18#include <linux/kernel.h>
19#include <linux/types.h>
20#include <linux/bitops.h>
21#include <linux/log2.h>
22
23#include "cpumap.h"
24#include "color.h"
25#include "evsel.h"
26#include "evlist.h"
27#include "machine.h"
28#include "session.h"
29#include "util.h"
30#include "thread.h"
31#include "thread-stack.h"
32#include "debug.h"
33#include "tsc.h"
34#include "auxtrace.h"
35#include "intel-pt-decoder/intel-pt-insn-decoder.h"
36#include "intel-bts.h"
37
38#define MAX_TIMESTAMP (~0ULL)
39
40#define INTEL_BTS_ERR_NOINSN 5
41#define INTEL_BTS_ERR_LOST 9
42
43#if __BYTE_ORDER == __BIG_ENDIAN
44#define le64_to_cpu bswap_64
45#else
46#define le64_to_cpu
47#endif
48
49struct intel_bts {
50 struct auxtrace auxtrace;
51 struct auxtrace_queues queues;
52 struct auxtrace_heap heap;
53 u32 auxtrace_type;
54 struct perf_session *session;
55 struct machine *machine;
56 bool sampling_mode;
57 bool snapshot_mode;
58 bool data_queued;
59 u32 pmu_type;
60 struct perf_tsc_conversion tc;
61 bool cap_user_time_zero;
62 struct itrace_synth_opts synth_opts;
63 bool sample_branches;
64 u32 branches_filter;
65 u64 branches_sample_type;
66 u64 branches_id;
67 size_t branches_event_size;
68 bool synth_needs_swap;
69};
70
71struct intel_bts_queue {
72 struct intel_bts *bts;
73 unsigned int queue_nr;
74 struct auxtrace_buffer *buffer;
75 bool on_heap;
76 bool done;
77 pid_t pid;
78 pid_t tid;
79 int cpu;
80 u64 time;
81 struct intel_pt_insn intel_pt_insn;
82 u32 sample_flags;
83};
84
85struct branch {
86 u64 from;
87 u64 to;
88 u64 misc;
89};
90
91static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
92 unsigned char *buf, size_t len)
93{
94 struct branch *branch;
95 size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
96 const char *color = PERF_COLOR_BLUE;
97
98 color_fprintf(stdout, color,
99 ". ... Intel BTS data: size %zu bytes\n",
100 len);
101
102 while (len) {
103 if (len >= br_sz)
104 sz = br_sz;
105 else
106 sz = len;
107 printf(".");
108 color_fprintf(stdout, color, " %08x: ", pos);
109 for (i = 0; i < sz; i++)
110 color_fprintf(stdout, color, " %02x", buf[i]);
111 for (; i < br_sz; i++)
112 color_fprintf(stdout, color, " ");
113 if (len >= br_sz) {
114 branch = (struct branch *)buf;
115 color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
116 le64_to_cpu(branch->from),
117 le64_to_cpu(branch->to),
118 le64_to_cpu(branch->misc) & 0x10 ?
119 "pred" : "miss");
120 } else {
121 color_fprintf(stdout, color, " Bad record!\n");
122 }
123 pos += sz;
124 buf += sz;
125 len -= sz;
126 }
127}
128
129static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
130 size_t len)
131{
132 printf(".\n");
133 intel_bts_dump(bts, buf, len);
134}
135
136static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
137{
138 union perf_event event;
139 int err;
140
141 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
142 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
143 sample->tid, 0, "Lost trace data");
144
145 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
146 if (err)
147 pr_err("Intel BTS: failed to deliver error event, error %d\n",
148 err);
149
150 return err;
151}
152
153static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
154 unsigned int queue_nr)
155{
156 struct intel_bts_queue *btsq;
157
158 btsq = zalloc(sizeof(struct intel_bts_queue));
159 if (!btsq)
160 return NULL;
161
162 btsq->bts = bts;
163 btsq->queue_nr = queue_nr;
164 btsq->pid = -1;
165 btsq->tid = -1;
166 btsq->cpu = -1;
167
168 return btsq;
169}
170
171static int intel_bts_setup_queue(struct intel_bts *bts,
172 struct auxtrace_queue *queue,
173 unsigned int queue_nr)
174{
175 struct intel_bts_queue *btsq = queue->priv;
176
177 if (list_empty(&queue->head))
178 return 0;
179
180 if (!btsq) {
181 btsq = intel_bts_alloc_queue(bts, queue_nr);
182 if (!btsq)
183 return -ENOMEM;
184 queue->priv = btsq;
185
186 if (queue->cpu != -1)
187 btsq->cpu = queue->cpu;
188 btsq->tid = queue->tid;
189 }
190
191 if (bts->sampling_mode)
192 return 0;
193
194 if (!btsq->on_heap && !btsq->buffer) {
195 int ret;
196
197 btsq->buffer = auxtrace_buffer__next(queue, NULL);
198 if (!btsq->buffer)
199 return 0;
200
201 ret = auxtrace_heap__add(&bts->heap, queue_nr,
202 btsq->buffer->reference);
203 if (ret)
204 return ret;
205 btsq->on_heap = true;
206 }
207
208 return 0;
209}
210
211static int intel_bts_setup_queues(struct intel_bts *bts)
212{
213 unsigned int i;
214 int ret;
215
216 for (i = 0; i < bts->queues.nr_queues; i++) {
217 ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
218 i);
219 if (ret)
220 return ret;
221 }
222 return 0;
223}
224
225static inline int intel_bts_update_queues(struct intel_bts *bts)
226{
227 if (bts->queues.new_data) {
228 bts->queues.new_data = false;
229 return intel_bts_setup_queues(bts);
230 }
231 return 0;
232}
233
234static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
235 unsigned char *buf_b, size_t len_b)
236{
237 size_t offs, len;
238
239 if (len_a > len_b)
240 offs = len_a - len_b;
241 else
242 offs = 0;
243
244 for (; offs < len_a; offs += sizeof(struct branch)) {
245 len = len_a - offs;
246 if (!memcmp(buf_a + offs, buf_b, len))
247 return buf_b + len;
248 }
249
250 return buf_b;
251}
252
253static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
254 struct auxtrace_buffer *b)
255{
256 struct auxtrace_buffer *a;
257 void *start;
258
259 if (b->list.prev == &queue->head)
260 return 0;
261 a = list_entry(b->list.prev, struct auxtrace_buffer, list);
262 start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
263 if (!start)
264 return -EINVAL;
265 b->use_size = b->data + b->size - start;
266 b->use_data = start;
267 return 0;
268}
269
270static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
271 struct branch *branch)
272{
273 int ret;
274 struct intel_bts *bts = btsq->bts;
275 union perf_event event;
276 struct perf_sample sample = { .ip = 0, };
277
278 event.sample.header.type = PERF_RECORD_SAMPLE;
279 event.sample.header.misc = PERF_RECORD_MISC_USER;
280 event.sample.header.size = sizeof(struct perf_event_header);
281
282 sample.ip = le64_to_cpu(branch->from);
283 sample.pid = btsq->pid;
284 sample.tid = btsq->tid;
285 sample.addr = le64_to_cpu(branch->to);
286 sample.id = btsq->bts->branches_id;
287 sample.stream_id = btsq->bts->branches_id;
288 sample.period = 1;
289 sample.cpu = btsq->cpu;
290 sample.flags = btsq->sample_flags;
291 sample.insn_len = btsq->intel_pt_insn.length;
292
293 if (bts->synth_opts.inject) {
294 event.sample.header.size = bts->branches_event_size;
295 ret = perf_event__synthesize_sample(&event,
296 bts->branches_sample_type,
297 0, &sample,
298 bts->synth_needs_swap);
299 if (ret)
300 return ret;
301 }
302
303 ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
304 if (ret)
305 pr_err("Intel BTS: failed to deliver branch event, error %d\n",
306 ret);
307
308 return ret;
309}
310
311static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
312{
313 struct machine *machine = btsq->bts->machine;
314 struct thread *thread;
315 struct addr_location al;
316 unsigned char buf[1024];
317 size_t bufsz;
318 ssize_t len;
319 int x86_64;
320 uint8_t cpumode;
321 int err = -1;
322
323 bufsz = intel_pt_insn_max_size();
324
325 if (machine__kernel_ip(machine, ip))
326 cpumode = PERF_RECORD_MISC_KERNEL;
327 else
328 cpumode = PERF_RECORD_MISC_USER;
329
330 thread = machine__find_thread(machine, -1, btsq->tid);
331 if (!thread)
332 return -1;
333
334 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
335 if (!al.map || !al.map->dso)
336 goto out_put;
337
338 len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
339 if (len <= 0)
340 goto out_put;
341
342 /* Load maps to ensure dso->is_64_bit has been updated */
343 map__load(al.map, machine->symbol_filter);
344
345 x86_64 = al.map->dso->is_64_bit;
346
347 if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
348 goto out_put;
349
350 err = 0;
351out_put:
352 thread__put(thread);
353 return err;
354}
355
356static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
357 pid_t tid, u64 ip)
358{
359 union perf_event event;
360 int err;
361
362 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
363 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
364 "Failed to get instruction");
365
366 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
367 if (err)
368 pr_err("Intel BTS: failed to deliver error event, error %d\n",
369 err);
370
371 return err;
372}
373
374static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
375 struct branch *branch)
376{
377 int err;
378
379 if (!branch->from) {
380 if (branch->to)
381 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
382 PERF_IP_FLAG_TRACE_BEGIN;
383 else
384 btsq->sample_flags = 0;
385 btsq->intel_pt_insn.length = 0;
386 } else if (!branch->to) {
387 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
388 PERF_IP_FLAG_TRACE_END;
389 btsq->intel_pt_insn.length = 0;
390 } else {
391 err = intel_bts_get_next_insn(btsq, branch->from);
392 if (err) {
393 btsq->sample_flags = 0;
394 btsq->intel_pt_insn.length = 0;
395 if (!btsq->bts->synth_opts.errors)
396 return 0;
397 err = intel_bts_synth_error(btsq->bts, btsq->cpu,
398 btsq->pid, btsq->tid,
399 branch->from);
400 return err;
401 }
402 btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
403 /* Check for an async branch into the kernel */
404 if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
405 machine__kernel_ip(btsq->bts->machine, branch->to) &&
406 btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
407 PERF_IP_FLAG_CALL |
408 PERF_IP_FLAG_SYSCALLRET))
409 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
410 PERF_IP_FLAG_CALL |
411 PERF_IP_FLAG_ASYNC |
412 PERF_IP_FLAG_INTERRUPT;
413 }
414
415 return 0;
416}
417
418static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
419 struct auxtrace_buffer *buffer)
420{
421 struct branch *branch;
422 size_t sz, bsz = sizeof(struct branch);
423 u32 filter = btsq->bts->branches_filter;
424 int err = 0;
425
426 if (buffer->use_data) {
427 sz = buffer->use_size;
428 branch = buffer->use_data;
429 } else {
430 sz = buffer->size;
431 branch = buffer->data;
432 }
433
434 if (!btsq->bts->sample_branches)
435 return 0;
436
437 for (; sz > bsz; branch += 1, sz -= bsz) {
438 if (!branch->from && !branch->to)
439 continue;
440 intel_bts_get_branch_type(btsq, branch);
441 if (filter && !(filter & btsq->sample_flags))
442 continue;
443 err = intel_bts_synth_branch_sample(btsq, branch);
444 if (err)
445 break;
446 }
447 return err;
448}
449
450static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
451{
452 struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
453 struct auxtrace_queue *queue;
454 struct thread *thread;
455 int err;
456
457 if (btsq->done)
458 return 1;
459
460 if (btsq->pid == -1) {
461 thread = machine__find_thread(btsq->bts->machine, -1,
462 btsq->tid);
463 if (thread)
464 btsq->pid = thread->pid_;
465 } else {
466 thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
467 btsq->tid);
468 }
469
470 queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
471
472 if (!buffer)
473 buffer = auxtrace_buffer__next(queue, NULL);
474
475 if (!buffer) {
476 if (!btsq->bts->sampling_mode)
477 btsq->done = 1;
478 err = 1;
479 goto out_put;
480 }
481
482 /* Currently there is no support for split buffers */
483 if (buffer->consecutive) {
484 err = -EINVAL;
485 goto out_put;
486 }
487
488 if (!buffer->data) {
489 int fd = perf_data_file__fd(btsq->bts->session->file);
490
491 buffer->data = auxtrace_buffer__get_data(buffer, fd);
492 if (!buffer->data) {
493 err = -ENOMEM;
494 goto out_put;
495 }
496 }
497
498 if (btsq->bts->snapshot_mode && !buffer->consecutive &&
499 intel_bts_do_fix_overlap(queue, buffer)) {
500 err = -ENOMEM;
501 goto out_put;
502 }
503
504 if (!btsq->bts->synth_opts.callchain && thread &&
505 (!old_buffer || btsq->bts->sampling_mode ||
506 (btsq->bts->snapshot_mode && !buffer->consecutive)))
507 thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
508
509 err = intel_bts_process_buffer(btsq, buffer);
510
511 auxtrace_buffer__drop_data(buffer);
512
513 btsq->buffer = auxtrace_buffer__next(queue, buffer);
514 if (btsq->buffer) {
515 if (timestamp)
516 *timestamp = btsq->buffer->reference;
517 } else {
518 if (!btsq->bts->sampling_mode)
519 btsq->done = 1;
520 }
521out_put:
522 thread__put(thread);
523 return err;
524}
525
526static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
527{
528 u64 ts = 0;
529 int ret;
530
531 while (1) {
532 ret = intel_bts_process_queue(btsq, &ts);
533 if (ret < 0)
534 return ret;
535 if (ret)
536 break;
537 }
538 return 0;
539}
540
541static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
542{
543 struct auxtrace_queues *queues = &bts->queues;
544 unsigned int i;
545
546 for (i = 0; i < queues->nr_queues; i++) {
547 struct auxtrace_queue *queue = &bts->queues.queue_array[i];
548 struct intel_bts_queue *btsq = queue->priv;
549
550 if (btsq && btsq->tid == tid)
551 return intel_bts_flush_queue(btsq);
552 }
553 return 0;
554}
555
556static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
557{
558 while (1) {
559 unsigned int queue_nr;
560 struct auxtrace_queue *queue;
561 struct intel_bts_queue *btsq;
562 u64 ts = 0;
563 int ret;
564
565 if (!bts->heap.heap_cnt)
566 return 0;
567
568 if (bts->heap.heap_array[0].ordinal > timestamp)
569 return 0;
570
571 queue_nr = bts->heap.heap_array[0].queue_nr;
572 queue = &bts->queues.queue_array[queue_nr];
573 btsq = queue->priv;
574
575 auxtrace_heap__pop(&bts->heap);
576
577 ret = intel_bts_process_queue(btsq, &ts);
578 if (ret < 0) {
579 auxtrace_heap__add(&bts->heap, queue_nr, ts);
580 return ret;
581 }
582
583 if (!ret) {
584 ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
585 if (ret < 0)
586 return ret;
587 } else {
588 btsq->on_heap = false;
589 }
590 }
591
592 return 0;
593}
594
595static int intel_bts_process_event(struct perf_session *session,
596 union perf_event *event,
597 struct perf_sample *sample,
598 struct perf_tool *tool)
599{
600 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
601 auxtrace);
602 u64 timestamp;
603 int err;
604
605 if (dump_trace)
606 return 0;
607
608 if (!tool->ordered_events) {
609 pr_err("Intel BTS requires ordered events\n");
610 return -EINVAL;
611 }
612
613 if (sample->time && sample->time != (u64)-1)
614 timestamp = perf_time_to_tsc(sample->time, &bts->tc);
615 else
616 timestamp = 0;
617
618 err = intel_bts_update_queues(bts);
619 if (err)
620 return err;
621
622 err = intel_bts_process_queues(bts, timestamp);
623 if (err)
624 return err;
625 if (event->header.type == PERF_RECORD_EXIT) {
626 err = intel_bts_process_tid_exit(bts, event->comm.tid);
627 if (err)
628 return err;
629 }
630
631 if (event->header.type == PERF_RECORD_AUX &&
632 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
633 bts->synth_opts.errors)
634 err = intel_bts_lost(bts, sample);
635
636 return err;
637}
638
639static int intel_bts_process_auxtrace_event(struct perf_session *session,
640 union perf_event *event,
641 struct perf_tool *tool __maybe_unused)
642{
643 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
644 auxtrace);
645
646 if (bts->sampling_mode)
647 return 0;
648
649 if (!bts->data_queued) {
650 struct auxtrace_buffer *buffer;
651 off_t data_offset;
652 int fd = perf_data_file__fd(session->file);
653 int err;
654
655 if (perf_data_file__is_pipe(session->file)) {
656 data_offset = 0;
657 } else {
658 data_offset = lseek(fd, 0, SEEK_CUR);
659 if (data_offset == -1)
660 return -errno;
661 }
662
663 err = auxtrace_queues__add_event(&bts->queues, session, event,
664 data_offset, &buffer);
665 if (err)
666 return err;
667
668 /* Dump here now we have copied a piped trace out of the pipe */
669 if (dump_trace) {
670 if (auxtrace_buffer__get_data(buffer, fd)) {
671 intel_bts_dump_event(bts, buffer->data,
672 buffer->size);
673 auxtrace_buffer__put_data(buffer);
674 }
675 }
676 }
677
678 return 0;
679}
680
681static int intel_bts_flush(struct perf_session *session __maybe_unused,
682 struct perf_tool *tool __maybe_unused)
683{
684 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
685 auxtrace);
686 int ret;
687
688 if (dump_trace || bts->sampling_mode)
689 return 0;
690
691 if (!tool->ordered_events)
692 return -EINVAL;
693
694 ret = intel_bts_update_queues(bts);
695 if (ret < 0)
696 return ret;
697
698 return intel_bts_process_queues(bts, MAX_TIMESTAMP);
699}
700
701static void intel_bts_free_queue(void *priv)
702{
703 struct intel_bts_queue *btsq = priv;
704
705 if (!btsq)
706 return;
707 free(btsq);
708}
709
710static void intel_bts_free_events(struct perf_session *session)
711{
712 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
713 auxtrace);
714 struct auxtrace_queues *queues = &bts->queues;
715 unsigned int i;
716
717 for (i = 0; i < queues->nr_queues; i++) {
718 intel_bts_free_queue(queues->queue_array[i].priv);
719 queues->queue_array[i].priv = NULL;
720 }
721 auxtrace_queues__free(queues);
722}
723
724static void intel_bts_free(struct perf_session *session)
725{
726 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
727 auxtrace);
728
729 auxtrace_heap__free(&bts->heap);
730 intel_bts_free_events(session);
731 session->auxtrace = NULL;
732 free(bts);
733}
734
735struct intel_bts_synth {
736 struct perf_tool dummy_tool;
737 struct perf_session *session;
738};
739
740static int intel_bts_event_synth(struct perf_tool *tool,
741 union perf_event *event,
742 struct perf_sample *sample __maybe_unused,
743 struct machine *machine __maybe_unused)
744{
745 struct intel_bts_synth *intel_bts_synth =
746 container_of(tool, struct intel_bts_synth, dummy_tool);
747
748 return perf_session__deliver_synth_event(intel_bts_synth->session,
749 event, NULL);
750}
751
752static int intel_bts_synth_event(struct perf_session *session,
753 struct perf_event_attr *attr, u64 id)
754{
755 struct intel_bts_synth intel_bts_synth;
756
757 memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
758 intel_bts_synth.session = session;
759
760 return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
761 &id, intel_bts_event_synth);
762}
763
764static int intel_bts_synth_events(struct intel_bts *bts,
765 struct perf_session *session)
766{
767 struct perf_evlist *evlist = session->evlist;
768 struct perf_evsel *evsel;
769 struct perf_event_attr attr;
770 bool found = false;
771 u64 id;
772 int err;
773
774 evlist__for_each(evlist, evsel) {
775 if (evsel->attr.type == bts->pmu_type && evsel->ids) {
776 found = true;
777 break;
778 }
779 }
780
781 if (!found) {
782 pr_debug("There are no selected events with Intel BTS data\n");
783 return 0;
784 }
785
786 memset(&attr, 0, sizeof(struct perf_event_attr));
787 attr.size = sizeof(struct perf_event_attr);
788 attr.type = PERF_TYPE_HARDWARE;
789 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
790 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
791 PERF_SAMPLE_PERIOD;
792 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
793 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
794 attr.exclude_user = evsel->attr.exclude_user;
795 attr.exclude_kernel = evsel->attr.exclude_kernel;
796 attr.exclude_hv = evsel->attr.exclude_hv;
797 attr.exclude_host = evsel->attr.exclude_host;
798 attr.exclude_guest = evsel->attr.exclude_guest;
799 attr.sample_id_all = evsel->attr.sample_id_all;
800 attr.read_format = evsel->attr.read_format;
801
802 id = evsel->id[0] + 1000000000;
803 if (!id)
804 id = 1;
805
806 if (bts->synth_opts.branches) {
807 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
808 attr.sample_period = 1;
809 attr.sample_type |= PERF_SAMPLE_ADDR;
810 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
811 id, (u64)attr.sample_type);
812 err = intel_bts_synth_event(session, &attr, id);
813 if (err) {
814 pr_err("%s: failed to synthesize 'branches' event type\n",
815 __func__);
816 return err;
817 }
818 bts->sample_branches = true;
819 bts->branches_sample_type = attr.sample_type;
820 bts->branches_id = id;
821 /*
822 * We only use sample types from PERF_SAMPLE_MASK so we can use
823 * __perf_evsel__sample_size() here.
824 */
825 bts->branches_event_size = sizeof(struct sample_event) +
826 __perf_evsel__sample_size(attr.sample_type);
827 }
828
829 bts->synth_needs_swap = evsel->needs_swap;
830
831 return 0;
832}
833
834static const char * const intel_bts_info_fmts[] = {
835 [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
836 [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
837 [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
838 [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
839 [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
840 [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
841};
842
843static void intel_bts_print_info(u64 *arr, int start, int finish)
844{
845 int i;
846
847 if (!dump_trace)
848 return;
849
850 for (i = start; i <= finish; i++)
851 fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
852}
853
854u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
855
856int intel_bts_process_auxtrace_info(union perf_event *event,
857 struct perf_session *session)
858{
859 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
860 size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
861 struct intel_bts *bts;
862 int err;
863
864 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
865 min_sz)
866 return -EINVAL;
867
868 bts = zalloc(sizeof(struct intel_bts));
869 if (!bts)
870 return -ENOMEM;
871
872 err = auxtrace_queues__init(&bts->queues);
873 if (err)
874 goto err_free;
875
876 bts->session = session;
877 bts->machine = &session->machines.host; /* No kvm support */
878 bts->auxtrace_type = auxtrace_info->type;
879 bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
880 bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
881 bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
882 bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
883 bts->cap_user_time_zero =
884 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
885 bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
886
887 bts->sampling_mode = false;
888
889 bts->auxtrace.process_event = intel_bts_process_event;
890 bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
891 bts->auxtrace.flush_events = intel_bts_flush;
892 bts->auxtrace.free_events = intel_bts_free_events;
893 bts->auxtrace.free = intel_bts_free;
894 session->auxtrace = &bts->auxtrace;
895
896 intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
897 INTEL_BTS_SNAPSHOT_MODE);
898
899 if (dump_trace)
900 return 0;
901
902 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
903 bts->synth_opts = *session->itrace_synth_opts;
904 else
905 itrace_synth_opts__set_default(&bts->synth_opts);
906
907 if (bts->synth_opts.calls)
908 bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
909 PERF_IP_FLAG_TRACE_END;
910 if (bts->synth_opts.returns)
911 bts->branches_filter |= PERF_IP_FLAG_RETURN |
912 PERF_IP_FLAG_TRACE_BEGIN;
913
914 err = intel_bts_synth_events(bts, session);
915 if (err)
916 goto err_free_queues;
917
918 err = auxtrace_queues__process_index(&bts->queues, session);
919 if (err)
920 goto err_free_queues;
921
922 if (bts->queues.populated)
923 bts->data_queued = true;
924
925 return 0;
926
927err_free_queues:
928 auxtrace_queues__free(&bts->queues);
929 session->auxtrace = NULL;
930err_free:
931 free(bts);
932 return err;
933}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000000..ca65e21b3e83
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,43 @@
1/*
2 * intel-bts.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__PERF_INTEL_BTS_H__
17#define INCLUDE__PERF_INTEL_BTS_H__
18
19#define INTEL_BTS_PMU_NAME "intel_bts"
20
21enum {
22 INTEL_BTS_PMU_TYPE,
23 INTEL_BTS_TIME_SHIFT,
24 INTEL_BTS_TIME_MULT,
25 INTEL_BTS_TIME_ZERO,
26 INTEL_BTS_CAP_USER_TIME_ZERO,
27 INTEL_BTS_SNAPSHOT_MODE,
28 INTEL_BTS_AUXTRACE_PRIV_MAX,
29};
30
31#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
32
33struct auxtrace_record;
34struct perf_tool;
35union perf_event;
36struct perf_session;
37
38struct auxtrace_record *intel_bts_recording_init(int *err);
39
40int intel_bts_process_auxtrace_info(union perf_event *event,
41 struct perf_session *session);
42
43#endif
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 2a4a4120473b..a5acd2fe2447 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1450,7 +1450,7 @@ static int intel_pt_process_event(struct perf_session *session,
1450 return -EINVAL; 1450 return -EINVAL;
1451 } 1451 }
1452 1452
1453 if (sample->time) 1453 if (sample->time && sample->time != (u64)-1)
1454 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 1454 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1455 else 1455 else
1456 timestamp = 0; 1456 timestamp = 0;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ce37e95bc513..b1c475d9b240 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -348,9 +348,18 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
348 return dso__find_symbol_by_name(map->dso, map->type, name); 348 return dso__find_symbol_by_name(map->dso, map->type, name);
349} 349}
350 350
351struct map *map__clone(struct map *map) 351struct map *map__clone(struct map *from)
352{ 352{
353 return memdup(map, sizeof(*map)); 353 struct map *map = memdup(from, sizeof(*map));
354
355 if (map != NULL) {
356 atomic_set(&map->refcnt, 1);
357 RB_CLEAR_NODE(&map->rb_node);
358 dso__get(map->dso);
359 map->groups = NULL;
360 }
361
362 return map;
354} 363}
355 364
356int map__overlap(struct map *l, struct map *r) 365int map__overlap(struct map *l, struct map *r)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 3c71138e7672..89c91a1a67e7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name)
462 LIST_HEAD(aliases); 462 LIST_HEAD(aliases);
463 __u32 type; 463 __u32 type;
464 464
465 /* No support for intel_bts so disallow it */
466 if (!strcmp(name, "intel_bts"))
467 return NULL;
468
469 /* 465 /*
470 * The pmu data we store & need consists of the pmu 466 * The pmu data we store & need consists of the pmu
471 * type value and format definitions. Load both right 467 * type value and format definitions. Load both right
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index fe4941a94a25..f07374bc9c5a 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -705,9 +705,10 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
705 } 705 }
706 /* Error path : ntevs < 0 */ 706 /* Error path : ntevs < 0 */
707 pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); 707 pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
708 if (ntevs == -EBADF) { 708 if (ntevs < 0) {
709 pr_warning("Warning: No dwarf info found in the vmlinux - " 709 if (ntevs == -EBADF)
710 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); 710 pr_warning("Warning: No dwarf info found in the vmlinux - "
711 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
711 if (!need_dwarf) { 712 if (!need_dwarf) {
712 pr_debug("Trying to use symbols.\n"); 713 pr_debug("Trying to use symbols.\n");
713 return 0; 714 return 0;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 725640fd7cd8..42e98ab5a9bb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1138,8 +1138,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1138 1138
1139 fd = open(kcore_filename, O_RDONLY); 1139 fd = open(kcore_filename, O_RDONLY);
1140 if (fd < 0) { 1140 if (fd < 0) {
1141 pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", 1141 pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
1142 kcore_filename); 1142 kcore_filename);
1143 return -EINVAL; 1143 return -EINVAL;
1144 } 1144 }
1145 1145