aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-07-17 12:33:43 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-08-21 10:34:10 -0400
commitd0170af7004dce9cd90b749842c37e379476cbc8 (patch)
treee30cf4c15b6b565cfe969b1613e297343b5b2932
parent6f56e9cf581c6cedcaea3eb69444b169867ccf3d (diff)
perf tools: Add Intel BTS support
Intel BTS support fits within the new auxtrace infrastructure. Recording is supporting by identifying the Intel BTS PMU, parsing options and setting up events. Decoding is supported by queuing up trace data by thread and then decoding synchronously delivering synthesized event samples into the session processing for tools to consume. Committer note: E.g: [root@felicio ~]# perf record --per-thread -e intel_bts// ls anaconda-ks.cfg apctest.output bin kernel-rt-3.10.0-298.rt56.171.el7.x86_64.rpm libexec lock_page.bpf.c perf.data perf.data.old [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 4.367 MB perf.data ] [root@felicio ~]# perf evlist -v intel_bts//: type: 6, size: 112, { sample_period, sample_freq }: 1, sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1 dummy:u: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1, sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1 [root@felicio ~]# perf script # the navigate in the pager to some interesting place: ls 1843 1 branches: ffffffff810a60cb flush_signal_handlers ([kernel.kallsyms]) => ffffffff8121a522 setup_new_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8121a529 setup_new_exec ([kernel.kallsyms]) => ffffffff8122fa30 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa5d do_close_on_exec ([kernel.kallsyms]) => ffffffff81767ae0 _raw_spin_lock ([kernel.kallsyms]) ls 1843 1 branches: ffffffff81767af4 _raw_spin_lock ([kernel.kallsyms]) => ffffffff8122fa62 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fac9 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fad2 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fadd do_close_on_exec ([kernel.kallsyms]) => ffffffff8120fc80 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fcaf filp_close ([kernel.kallsyms]) => ffffffff8120fcb6 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fcc2 filp_close ([kernel.kallsyms]) => ffffffff812547f0 dnotify_flush ([kernel.kallsyms]) ls 1843 1 branches: ffffffff81254823 dnotify_flush ([kernel.kallsyms]) => ffffffff8120fcc7 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fccd filp_close ([kernel.kallsyms]) => ffffffff81261790 locks_remove_posix ([kernel.kallsyms]) ls 1843 1 branches: ffffffff812617a3 locks_remove_posix ([kernel.kallsyms]) => ffffffff812617b9 locks_remove_posix ([kernel.kallsyms]) ls 1843 1 branches: ffffffff812617b9 locks_remove_posix ([kernel.kallsyms]) => ffffffff8120fcd2 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fcd5 filp_close ([kernel.kallsyms]) => ffffffff812142c0 fput ([kernel.kallsyms]) ls 1843 1 branches: ffffffff812142d6 fput ([kernel.kallsyms]) => ffffffff812142df fput ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8121430c fput ([kernel.kallsyms]) => ffffffff810b6580 task_work_add ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810b65ad task_work_add ([kernel.kallsyms]) => ffffffff810b65b1 task_work_add ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810b65c1 task_work_add ([kernel.kallsyms]) => ffffffff810bc710 kick_process ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810bc725 kick_process ([kernel.kallsyms]) => ffffffff810bc742 kick_process ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810bc742 kick_process ([kernel.kallsyms]) => ffffffff810b65c6 task_work_add ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810b65c9 task_work_add ([kernel.kallsyms]) => ffffffff81214311 fput ([kernel.kallsyms]) Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1437150840-31811-9-git-send-email-adrian.hunter@intel.com [ Merged sample->time fix for bug found after first round of testing on slightly older kernel ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/intel-bts.txt86
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c49
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c458
-rw-r--r--tools/perf/arch/x86/util/pmu.c3
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/auxtrace.c3
-rw-r--r--tools/perf/util/auxtrace.h1
-rw-r--r--tools/perf/util/intel-bts.c933
-rw-r--r--tools/perf/util/intel-bts.h43
-rw-r--r--tools/perf/util/pmu.c4
11 files changed, 1576 insertions, 6 deletions
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
new file mode 100644
index 000000000000..8bdc93bd7fdb
--- /dev/null
+++ b/tools/perf/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
1Intel Branch Trace Store
2========================
3
4Overview
5========
6
7Intel BTS could be regarded as a predecessor to Intel PT and has some
8similarities because it can also identify every branch a program takes. A
9notable difference is that Intel BTS has no timing information and as a
10consequence the present implementation is limited to per-thread recording.
11
12While decoding Intel BTS does not require walking the object code, the object
13code is still needed to pair up calls and returns correctly, consequently much
14of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
15documentation and consider that the PMU 'intel_bts' can usually be used in
16place of 'intel_pt' in the examples provided, with the proviso that per-thread
17recording must also be stipulated i.e. the --per-thread option for
18'perf record'.
19
20
21perf record
22===========
23
24new event
25---------
26
27The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
28option is:
29
30 -e intel_bts//
31
32Currently Intel BTS is limited to per-thread tracing so the --per-thread option
33is also needed.
34
35
36snapshot option
37---------------
38
39The snapshot option is the same as Intel PT (refer Intel PT documentation).
40
41
42auxtrace mmap size option
43-----------------------
44
45The mmap size option is the same as Intel PT (refer Intel PT documentation).
46
47
48perf script
49===========
50
51By default, perf script will decode trace data found in the perf.data file.
52This can be further controlled by option --itrace. The --itrace option is
53the same as Intel PT (refer Intel PT documentation) except that neither
54"instructions" events nor "transactions" events (and consequently call
55chains) are supported.
56
57To disable trace decoding entirely, use the option --no-itrace.
58
59
60dump option
61-----------
62
63perf script has an option (-D) to "dump" the events i.e. display the binary
64data.
65
66When -D is used, Intel BTS packets are displayed.
67
68To disable the display of Intel BTS packets, combine the -D option with
69--no-itrace.
70
71
72perf report
73===========
74
75By default, perf report will decode trace data found in the perf.data file.
76This can be further controlled by new option --itrace exactly the same as
77perf script.
78
79
80perf inject
81===========
82
83perf inject also accepts the --itrace option in which case tracing data is
84removed and replaced with the synthesized events. e.g.
85
86 perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index a8be9f9d0462..2c55e1b336c5 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -10,3 +10,4 @@ libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
10 10
11libperf-$(CONFIG_AUXTRACE) += auxtrace.o 11libperf-$(CONFIG_AUXTRACE) += auxtrace.o
12libperf-$(CONFIG_AUXTRACE) += intel-pt.o 12libperf-$(CONFIG_AUXTRACE) += intel-pt.o
13libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index e7654b506312..7a7805583e3f 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -13,11 +13,56 @@
13 * 13 *
14 */ 14 */
15 15
16#include <stdbool.h>
17
16#include "../../util/header.h" 18#include "../../util/header.h"
19#include "../../util/debug.h"
20#include "../../util/pmu.h"
17#include "../../util/auxtrace.h" 21#include "../../util/auxtrace.h"
18#include "../../util/intel-pt.h" 22#include "../../util/intel-pt.h"
23#include "../../util/intel-bts.h"
24#include "../../util/evlist.h"
25
26static
27struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
28 int *err)
29{
30 struct perf_pmu *intel_pt_pmu;
31 struct perf_pmu *intel_bts_pmu;
32 struct perf_evsel *evsel;
33 bool found_pt = false;
34 bool found_bts = false;
35
36 intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
37 intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
38
39 if (evlist) {
40 evlist__for_each(evlist, evsel) {
41 if (intel_pt_pmu &&
42 evsel->attr.type == intel_pt_pmu->type)
43 found_pt = true;
44 if (intel_bts_pmu &&
45 evsel->attr.type == intel_bts_pmu->type)
46 found_bts = true;
47 }
48 }
49
50 if (found_pt && found_bts) {
51 pr_err("intel_pt and intel_bts may not be used together\n");
52 *err = -EINVAL;
53 return NULL;
54 }
55
56 if (found_pt)
57 return intel_pt_recording_init(err);
58
59 if (found_bts)
60 return intel_bts_recording_init(err);
19 61
20struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, 62 return NULL;
63}
64
65struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
21 int *err) 66 int *err)
22{ 67{
23 char buffer[64]; 68 char buffer[64];
@@ -32,7 +77,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe
32 } 77 }
33 78
34 if (!strncmp(buffer, "GenuineIntel,", 13)) 79 if (!strncmp(buffer, "GenuineIntel,", 13))
35 return intel_pt_recording_init(err); 80 return auxtrace_record__init_intel(evlist, err);
36 81
37 return NULL; 82 return NULL;
38} 83}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
new file mode 100644
index 000000000000..9b94ce520917
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -0,0 +1,458 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <linux/kernel.h>
17#include <linux/types.h>
18#include <linux/bitops.h>
19#include <linux/log2.h>
20
21#include "../../util/cpumap.h"
22#include "../../util/evsel.h"
23#include "../../util/evlist.h"
24#include "../../util/session.h"
25#include "../../util/util.h"
26#include "../../util/pmu.h"
27#include "../../util/debug.h"
28#include "../../util/tsc.h"
29#include "../../util/auxtrace.h"
30#include "../../util/intel-bts.h"
31
32#define KiB(x) ((x) * 1024)
33#define MiB(x) ((x) * 1024 * 1024)
34#define KiB_MASK(x) (KiB(x) - 1)
35#define MiB_MASK(x) (MiB(x) - 1)
36
37#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4)
38
39#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60)
40
41struct intel_bts_snapshot_ref {
42 void *ref_buf;
43 size_t ref_offset;
44 bool wrapped;
45};
46
47struct intel_bts_recording {
48 struct auxtrace_record itr;
49 struct perf_pmu *intel_bts_pmu;
50 struct perf_evlist *evlist;
51 bool snapshot_mode;
52 size_t snapshot_size;
53 int snapshot_ref_cnt;
54 struct intel_bts_snapshot_ref *snapshot_refs;
55};
56
57struct branch {
58 u64 from;
59 u64 to;
60 u64 misc;
61};
62
63static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
64{
65 return INTEL_BTS_AUXTRACE_PRIV_SIZE;
66}
67
68static int intel_bts_info_fill(struct auxtrace_record *itr,
69 struct perf_session *session,
70 struct auxtrace_info_event *auxtrace_info,
71 size_t priv_size)
72{
73 struct intel_bts_recording *btsr =
74 container_of(itr, struct intel_bts_recording, itr);
75 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
76 struct perf_event_mmap_page *pc;
77 struct perf_tsc_conversion tc = { .time_mult = 0, };
78 bool cap_user_time_zero = false;
79 int err;
80
81 if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
82 return -EINVAL;
83
84 if (!session->evlist->nr_mmaps)
85 return -EINVAL;
86
87 pc = session->evlist->mmap[0].base;
88 if (pc) {
89 err = perf_read_tsc_conversion(pc, &tc);
90 if (err) {
91 if (err != -EOPNOTSUPP)
92 return err;
93 } else {
94 cap_user_time_zero = tc.time_mult != 0;
95 }
96 if (!cap_user_time_zero)
97 ui__warning("Intel BTS: TSC not available\n");
98 }
99
100 auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
101 auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
102 auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
103 auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
104 auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
105 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
106 auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
107
108 return 0;
109}
110
111static int intel_bts_recording_options(struct auxtrace_record *itr,
112 struct perf_evlist *evlist,
113 struct record_opts *opts)
114{
115 struct intel_bts_recording *btsr =
116 container_of(itr, struct intel_bts_recording, itr);
117 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
118 struct perf_evsel *evsel, *intel_bts_evsel = NULL;
119 const struct cpu_map *cpus = evlist->cpus;
120 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
121
122 btsr->evlist = evlist;
123 btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
124
125 evlist__for_each(evlist, evsel) {
126 if (evsel->attr.type == intel_bts_pmu->type) {
127 if (intel_bts_evsel) {
128 pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
129 return -EINVAL;
130 }
131 evsel->attr.freq = 0;
132 evsel->attr.sample_period = 1;
133 intel_bts_evsel = evsel;
134 opts->full_auxtrace = true;
135 }
136 }
137
138 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
139 pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
140 return -EINVAL;
141 }
142
143 if (!opts->full_auxtrace)
144 return 0;
145
146 if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
147 pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
148 return -EINVAL;
149 }
150
151 /* Set default sizes for snapshot mode */
152 if (opts->auxtrace_snapshot_mode) {
153 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
154 if (privileged) {
155 opts->auxtrace_mmap_pages = MiB(4) / page_size;
156 } else {
157 opts->auxtrace_mmap_pages = KiB(128) / page_size;
158 if (opts->mmap_pages == UINT_MAX)
159 opts->mmap_pages = KiB(256) / page_size;
160 }
161 } else if (!opts->auxtrace_mmap_pages && !privileged &&
162 opts->mmap_pages == UINT_MAX) {
163 opts->mmap_pages = KiB(256) / page_size;
164 }
165 if (!opts->auxtrace_snapshot_size)
166 opts->auxtrace_snapshot_size =
167 opts->auxtrace_mmap_pages * (size_t)page_size;
168 if (!opts->auxtrace_mmap_pages) {
169 size_t sz = opts->auxtrace_snapshot_size;
170
171 sz = round_up(sz, page_size) / page_size;
172 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
173 }
174 if (opts->auxtrace_snapshot_size >
175 opts->auxtrace_mmap_pages * (size_t)page_size) {
176 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
177 opts->auxtrace_snapshot_size,
178 opts->auxtrace_mmap_pages * (size_t)page_size);
179 return -EINVAL;
180 }
181 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
182 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
183 return -EINVAL;
184 }
185 pr_debug2("Intel BTS snapshot size: %zu\n",
186 opts->auxtrace_snapshot_size);
187 }
188
189 /* Set default sizes for full trace mode */
190 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
191 if (privileged) {
192 opts->auxtrace_mmap_pages = MiB(4) / page_size;
193 } else {
194 opts->auxtrace_mmap_pages = KiB(128) / page_size;
195 if (opts->mmap_pages == UINT_MAX)
196 opts->mmap_pages = KiB(256) / page_size;
197 }
198 }
199
200 /* Validate auxtrace_mmap_pages */
201 if (opts->auxtrace_mmap_pages) {
202 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
203 size_t min_sz;
204
205 if (opts->auxtrace_snapshot_mode)
206 min_sz = KiB(4);
207 else
208 min_sz = KiB(8);
209
210 if (sz < min_sz || !is_power_of_2(sz)) {
211 pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
212 min_sz / 1024);
213 return -EINVAL;
214 }
215 }
216
217 if (intel_bts_evsel) {
218 /*
219 * To obtain the auxtrace buffer file descriptor, the auxtrace event
220 * must come first.
221 */
222 perf_evlist__to_front(evlist, intel_bts_evsel);
223 /*
224 * In the case of per-cpu mmaps, we need the CPU on the
225 * AUX event.
226 */
227 if (!cpu_map__empty(cpus))
228 perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
229 }
230
231 /* Add dummy event to keep tracking */
232 if (opts->full_auxtrace) {
233 struct perf_evsel *tracking_evsel;
234 int err;
235
236 err = parse_events(evlist, "dummy:u", NULL);
237 if (err)
238 return err;
239
240 tracking_evsel = perf_evlist__last(evlist);
241
242 perf_evlist__set_tracking_event(evlist, tracking_evsel);
243
244 tracking_evsel->attr.freq = 0;
245 tracking_evsel->attr.sample_period = 1;
246 }
247
248 return 0;
249}
250
251static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
252 struct record_opts *opts,
253 const char *str)
254{
255 struct intel_bts_recording *btsr =
256 container_of(itr, struct intel_bts_recording, itr);
257 unsigned long long snapshot_size = 0;
258 char *endptr;
259
260 if (str) {
261 snapshot_size = strtoull(str, &endptr, 0);
262 if (*endptr || snapshot_size > SIZE_MAX)
263 return -1;
264 }
265
266 opts->auxtrace_snapshot_mode = true;
267 opts->auxtrace_snapshot_size = snapshot_size;
268
269 btsr->snapshot_size = snapshot_size;
270
271 return 0;
272}
273
274static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
275{
276 return rdtsc();
277}
278
279static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
280 int idx)
281{
282 const size_t sz = sizeof(struct intel_bts_snapshot_ref);
283 int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
284 struct intel_bts_snapshot_ref *refs;
285
286 if (!new_cnt)
287 new_cnt = 16;
288
289 while (new_cnt <= idx)
290 new_cnt *= 2;
291
292 refs = calloc(new_cnt, sz);
293 if (!refs)
294 return -ENOMEM;
295
296 memcpy(refs, btsr->snapshot_refs, cnt * sz);
297
298 btsr->snapshot_refs = refs;
299 btsr->snapshot_ref_cnt = new_cnt;
300
301 return 0;
302}
303
304static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
305{
306 int i;
307
308 for (i = 0; i < btsr->snapshot_ref_cnt; i++)
309 zfree(&btsr->snapshot_refs[i].ref_buf);
310 zfree(&btsr->snapshot_refs);
311}
312
313static void intel_bts_recording_free(struct auxtrace_record *itr)
314{
315 struct intel_bts_recording *btsr =
316 container_of(itr, struct intel_bts_recording, itr);
317
318 intel_bts_free_snapshot_refs(btsr);
319 free(btsr);
320}
321
322static int intel_bts_snapshot_start(struct auxtrace_record *itr)
323{
324 struct intel_bts_recording *btsr =
325 container_of(itr, struct intel_bts_recording, itr);
326 struct perf_evsel *evsel;
327
328 evlist__for_each(btsr->evlist, evsel) {
329 if (evsel->attr.type == btsr->intel_bts_pmu->type)
330 return perf_evlist__disable_event(btsr->evlist, evsel);
331 }
332 return -EINVAL;
333}
334
335static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
336{
337 struct intel_bts_recording *btsr =
338 container_of(itr, struct intel_bts_recording, itr);
339 struct perf_evsel *evsel;
340
341 evlist__for_each(btsr->evlist, evsel) {
342 if (evsel->attr.type == btsr->intel_bts_pmu->type)
343 return perf_evlist__enable_event(btsr->evlist, evsel);
344 }
345 return -EINVAL;
346}
347
348static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
349{
350 int i, a, b;
351
352 b = buf_size >> 3;
353 a = b - 512;
354 if (a < 0)
355 a = 0;
356
357 for (i = a; i < b; i++) {
358 if (data[i])
359 return true;
360 }
361
362 return false;
363}
364
365static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
366 struct auxtrace_mmap *mm, unsigned char *data,
367 u64 *head, u64 *old)
368{
369 struct intel_bts_recording *btsr =
370 container_of(itr, struct intel_bts_recording, itr);
371 bool wrapped;
372 int err;
373
374 pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
375 __func__, idx, (size_t)*old, (size_t)*head);
376
377 if (idx >= btsr->snapshot_ref_cnt) {
378 err = intel_bts_alloc_snapshot_refs(btsr, idx);
379 if (err)
380 goto out_err;
381 }
382
383 wrapped = btsr->snapshot_refs[idx].wrapped;
384 if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
385 btsr->snapshot_refs[idx].wrapped = true;
386 wrapped = true;
387 }
388
389 /*
390 * In full trace mode 'head' continually increases. However in snapshot
391 * mode 'head' is an offset within the buffer. Here 'old' and 'head'
392 * are adjusted to match the full trace case which expects that 'old' is
393 * always less than 'head'.
394 */
395 if (wrapped) {
396 *old = *head;
397 *head += mm->len;
398 } else {
399 if (mm->mask)
400 *old &= mm->mask;
401 else
402 *old %= mm->len;
403 if (*old > *head)
404 *head += mm->len;
405 }
406
407 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
408 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
409
410 return 0;
411
412out_err:
413 pr_err("%s: failed, error %d\n", __func__, err);
414 return err;
415}
416
417static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
418{
419 struct intel_bts_recording *btsr =
420 container_of(itr, struct intel_bts_recording, itr);
421 struct perf_evsel *evsel;
422
423 evlist__for_each(btsr->evlist, evsel) {
424 if (evsel->attr.type == btsr->intel_bts_pmu->type)
425 return perf_evlist__enable_event_idx(btsr->evlist,
426 evsel, idx);
427 }
428 return -EINVAL;
429}
430
431struct auxtrace_record *intel_bts_recording_init(int *err)
432{
433 struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
434 struct intel_bts_recording *btsr;
435
436 if (!intel_bts_pmu)
437 return NULL;
438
439 btsr = zalloc(sizeof(struct intel_bts_recording));
440 if (!btsr) {
441 *err = -ENOMEM;
442 return NULL;
443 }
444
445 btsr->intel_bts_pmu = intel_bts_pmu;
446 btsr->itr.recording_options = intel_bts_recording_options;
447 btsr->itr.info_priv_size = intel_bts_info_priv_size;
448 btsr->itr.info_fill = intel_bts_info_fill;
449 btsr->itr.free = intel_bts_recording_free;
450 btsr->itr.snapshot_start = intel_bts_snapshot_start;
451 btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
452 btsr->itr.find_snapshot = intel_bts_find_snapshot;
453 btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
454 btsr->itr.reference = intel_bts_reference;
455 btsr->itr.read_finish = intel_bts_read_finish;
456 btsr->itr.alignment = sizeof(struct branch);
457 return &btsr->itr;
458}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index fd11cc3ce780..79fe07158d00 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -3,6 +3,7 @@
3#include <linux/perf_event.h> 3#include <linux/perf_event.h>
4 4
5#include "../../util/intel-pt.h" 5#include "../../util/intel-pt.h"
6#include "../../util/intel-bts.h"
6#include "../../util/pmu.h" 7#include "../../util/pmu.h"
7 8
8struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) 9struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
@@ -10,6 +11,8 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
10#ifdef HAVE_AUXTRACE_SUPPORT 11#ifdef HAVE_AUXTRACE_SUPPORT
11 if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) 12 if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
12 return intel_pt_pmu_default_config(pmu); 13 return intel_pt_pmu_default_config(pmu);
14 if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
15 pmu->selectable = true;
13#endif 16#endif
14 return NULL; 17 return NULL;
15} 18}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c20473d1369e..e912856cc4e5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -80,6 +80,7 @@ libperf-y += thread-stack.o
80libperf-$(CONFIG_AUXTRACE) += auxtrace.o 80libperf-$(CONFIG_AUXTRACE) += auxtrace.o
81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
82libperf-$(CONFIG_AUXTRACE) += intel-pt.o 82libperf-$(CONFIG_AUXTRACE) += intel-pt.o
83libperf-$(CONFIG_AUXTRACE) += intel-bts.o
83libperf-y += parse-branch-options.o 84libperf-y += parse-branch-options.o
84 85
85libperf-$(CONFIG_LIBELF) += symbol-elf.o 86libperf-$(CONFIG_LIBELF) += symbol-elf.o
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 0f0b7e11e2d9..a980e7c50ee0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -48,6 +48,7 @@
48#include "parse-options.h" 48#include "parse-options.h"
49 49
50#include "intel-pt.h" 50#include "intel-pt.h"
51#include "intel-bts.h"
51 52
52int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, 53int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
53 struct auxtrace_mmap_params *mp, 54 struct auxtrace_mmap_params *mp,
@@ -888,6 +889,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
888 switch (type) { 889 switch (type) {
889 case PERF_AUXTRACE_INTEL_PT: 890 case PERF_AUXTRACE_INTEL_PT:
890 return intel_pt_process_auxtrace_info(event, session); 891 return intel_pt_process_auxtrace_info(event, session);
892 case PERF_AUXTRACE_INTEL_BTS:
893 return intel_bts_process_auxtrace_info(event, session);
891 case PERF_AUXTRACE_UNKNOWN: 894 case PERF_AUXTRACE_UNKNOWN:
892 default: 895 default:
893 return -EINVAL; 896 return -EINVAL;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 7d12f33a3a06..bf72b77a588a 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -40,6 +40,7 @@ struct events_stats;
40enum auxtrace_type { 40enum auxtrace_type {
41 PERF_AUXTRACE_UNKNOWN, 41 PERF_AUXTRACE_UNKNOWN,
42 PERF_AUXTRACE_INTEL_PT, 42 PERF_AUXTRACE_INTEL_PT,
43 PERF_AUXTRACE_INTEL_BTS,
43}; 44};
44 45
45enum itrace_period_type { 46enum itrace_period_type {
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000000..ea768625ab5b
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,933 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <endian.h>
17#include <byteswap.h>
18#include <linux/kernel.h>
19#include <linux/types.h>
20#include <linux/bitops.h>
21#include <linux/log2.h>
22
23#include "cpumap.h"
24#include "color.h"
25#include "evsel.h"
26#include "evlist.h"
27#include "machine.h"
28#include "session.h"
29#include "util.h"
30#include "thread.h"
31#include "thread-stack.h"
32#include "debug.h"
33#include "tsc.h"
34#include "auxtrace.h"
35#include "intel-pt-decoder/intel-pt-insn-decoder.h"
36#include "intel-bts.h"
37
38#define MAX_TIMESTAMP (~0ULL)
39
40#define INTEL_BTS_ERR_NOINSN 5
41#define INTEL_BTS_ERR_LOST 9
42
43#if __BYTE_ORDER == __BIG_ENDIAN
44#define le64_to_cpu bswap_64
45#else
46#define le64_to_cpu
47#endif
48
49struct intel_bts {
50 struct auxtrace auxtrace;
51 struct auxtrace_queues queues;
52 struct auxtrace_heap heap;
53 u32 auxtrace_type;
54 struct perf_session *session;
55 struct machine *machine;
56 bool sampling_mode;
57 bool snapshot_mode;
58 bool data_queued;
59 u32 pmu_type;
60 struct perf_tsc_conversion tc;
61 bool cap_user_time_zero;
62 struct itrace_synth_opts synth_opts;
63 bool sample_branches;
64 u32 branches_filter;
65 u64 branches_sample_type;
66 u64 branches_id;
67 size_t branches_event_size;
68 bool synth_needs_swap;
69};
70
71struct intel_bts_queue {
72 struct intel_bts *bts;
73 unsigned int queue_nr;
74 struct auxtrace_buffer *buffer;
75 bool on_heap;
76 bool done;
77 pid_t pid;
78 pid_t tid;
79 int cpu;
80 u64 time;
81 struct intel_pt_insn intel_pt_insn;
82 u32 sample_flags;
83};
84
85struct branch {
86 u64 from;
87 u64 to;
88 u64 misc;
89};
90
91static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
92 unsigned char *buf, size_t len)
93{
94 struct branch *branch;
95 size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
96 const char *color = PERF_COLOR_BLUE;
97
98 color_fprintf(stdout, color,
99 ". ... Intel BTS data: size %zu bytes\n",
100 len);
101
102 while (len) {
103 if (len >= br_sz)
104 sz = br_sz;
105 else
106 sz = len;
107 printf(".");
108 color_fprintf(stdout, color, " %08x: ", pos);
109 for (i = 0; i < sz; i++)
110 color_fprintf(stdout, color, " %02x", buf[i]);
111 for (; i < br_sz; i++)
112 color_fprintf(stdout, color, " ");
113 if (len >= br_sz) {
114 branch = (struct branch *)buf;
115 color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
116 le64_to_cpu(branch->from),
117 le64_to_cpu(branch->to),
118 le64_to_cpu(branch->misc) & 0x10 ?
119 "pred" : "miss");
120 } else {
121 color_fprintf(stdout, color, " Bad record!\n");
122 }
123 pos += sz;
124 buf += sz;
125 len -= sz;
126 }
127}
128
129static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
130 size_t len)
131{
132 printf(".\n");
133 intel_bts_dump(bts, buf, len);
134}
135
136static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
137{
138 union perf_event event;
139 int err;
140
141 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
142 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
143 sample->tid, 0, "Lost trace data");
144
145 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
146 if (err)
147 pr_err("Intel BTS: failed to deliver error event, error %d\n",
148 err);
149
150 return err;
151}
152
153static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
154 unsigned int queue_nr)
155{
156 struct intel_bts_queue *btsq;
157
158 btsq = zalloc(sizeof(struct intel_bts_queue));
159 if (!btsq)
160 return NULL;
161
162 btsq->bts = bts;
163 btsq->queue_nr = queue_nr;
164 btsq->pid = -1;
165 btsq->tid = -1;
166 btsq->cpu = -1;
167
168 return btsq;
169}
170
171static int intel_bts_setup_queue(struct intel_bts *bts,
172 struct auxtrace_queue *queue,
173 unsigned int queue_nr)
174{
175 struct intel_bts_queue *btsq = queue->priv;
176
177 if (list_empty(&queue->head))
178 return 0;
179
180 if (!btsq) {
181 btsq = intel_bts_alloc_queue(bts, queue_nr);
182 if (!btsq)
183 return -ENOMEM;
184 queue->priv = btsq;
185
186 if (queue->cpu != -1)
187 btsq->cpu = queue->cpu;
188 btsq->tid = queue->tid;
189 }
190
191 if (bts->sampling_mode)
192 return 0;
193
194 if (!btsq->on_heap && !btsq->buffer) {
195 int ret;
196
197 btsq->buffer = auxtrace_buffer__next(queue, NULL);
198 if (!btsq->buffer)
199 return 0;
200
201 ret = auxtrace_heap__add(&bts->heap, queue_nr,
202 btsq->buffer->reference);
203 if (ret)
204 return ret;
205 btsq->on_heap = true;
206 }
207
208 return 0;
209}
210
211static int intel_bts_setup_queues(struct intel_bts *bts)
212{
213 unsigned int i;
214 int ret;
215
216 for (i = 0; i < bts->queues.nr_queues; i++) {
217 ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
218 i);
219 if (ret)
220 return ret;
221 }
222 return 0;
223}
224
225static inline int intel_bts_update_queues(struct intel_bts *bts)
226{
227 if (bts->queues.new_data) {
228 bts->queues.new_data = false;
229 return intel_bts_setup_queues(bts);
230 }
231 return 0;
232}
233
234static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
235 unsigned char *buf_b, size_t len_b)
236{
237 size_t offs, len;
238
239 if (len_a > len_b)
240 offs = len_a - len_b;
241 else
242 offs = 0;
243
244 for (; offs < len_a; offs += sizeof(struct branch)) {
245 len = len_a - offs;
246 if (!memcmp(buf_a + offs, buf_b, len))
247 return buf_b + len;
248 }
249
250 return buf_b;
251}
252
253static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
254 struct auxtrace_buffer *b)
255{
256 struct auxtrace_buffer *a;
257 void *start;
258
259 if (b->list.prev == &queue->head)
260 return 0;
261 a = list_entry(b->list.prev, struct auxtrace_buffer, list);
262 start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
263 if (!start)
264 return -EINVAL;
265 b->use_size = b->data + b->size - start;
266 b->use_data = start;
267 return 0;
268}
269
270static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
271 struct branch *branch)
272{
273 int ret;
274 struct intel_bts *bts = btsq->bts;
275 union perf_event event;
276 struct perf_sample sample = { .ip = 0, };
277
278 event.sample.header.type = PERF_RECORD_SAMPLE;
279 event.sample.header.misc = PERF_RECORD_MISC_USER;
280 event.sample.header.size = sizeof(struct perf_event_header);
281
282 sample.ip = le64_to_cpu(branch->from);
283 sample.pid = btsq->pid;
284 sample.tid = btsq->tid;
285 sample.addr = le64_to_cpu(branch->to);
286 sample.id = btsq->bts->branches_id;
287 sample.stream_id = btsq->bts->branches_id;
288 sample.period = 1;
289 sample.cpu = btsq->cpu;
290 sample.flags = btsq->sample_flags;
291 sample.insn_len = btsq->intel_pt_insn.length;
292
293 if (bts->synth_opts.inject) {
294 event.sample.header.size = bts->branches_event_size;
295 ret = perf_event__synthesize_sample(&event,
296 bts->branches_sample_type,
297 0, &sample,
298 bts->synth_needs_swap);
299 if (ret)
300 return ret;
301 }
302
303 ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
304 if (ret)
305 pr_err("Intel BTS: failed to deliver branch event, error %d\n",
306 ret);
307
308 return ret;
309}
310
311static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
312{
313 struct machine *machine = btsq->bts->machine;
314 struct thread *thread;
315 struct addr_location al;
316 unsigned char buf[1024];
317 size_t bufsz;
318 ssize_t len;
319 int x86_64;
320 uint8_t cpumode;
321 int err = -1;
322
323 bufsz = intel_pt_insn_max_size();
324
325 if (machine__kernel_ip(machine, ip))
326 cpumode = PERF_RECORD_MISC_KERNEL;
327 else
328 cpumode = PERF_RECORD_MISC_USER;
329
330 thread = machine__find_thread(machine, -1, btsq->tid);
331 if (!thread)
332 return -1;
333
334 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
335 if (!al.map || !al.map->dso)
336 goto out_put;
337
338 len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
339 if (len <= 0)
340 goto out_put;
341
342 /* Load maps to ensure dso->is_64_bit has been updated */
343 map__load(al.map, machine->symbol_filter);
344
345 x86_64 = al.map->dso->is_64_bit;
346
347 if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
348 goto out_put;
349
350 err = 0;
351out_put:
352 thread__put(thread);
353 return err;
354}
355
356static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
357 pid_t tid, u64 ip)
358{
359 union perf_event event;
360 int err;
361
362 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
363 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
364 "Failed to get instruction");
365
366 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
367 if (err)
368 pr_err("Intel BTS: failed to deliver error event, error %d\n",
369 err);
370
371 return err;
372}
373
374static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
375 struct branch *branch)
376{
377 int err;
378
379 if (!branch->from) {
380 if (branch->to)
381 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
382 PERF_IP_FLAG_TRACE_BEGIN;
383 else
384 btsq->sample_flags = 0;
385 btsq->intel_pt_insn.length = 0;
386 } else if (!branch->to) {
387 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
388 PERF_IP_FLAG_TRACE_END;
389 btsq->intel_pt_insn.length = 0;
390 } else {
391 err = intel_bts_get_next_insn(btsq, branch->from);
392 if (err) {
393 btsq->sample_flags = 0;
394 btsq->intel_pt_insn.length = 0;
395 if (!btsq->bts->synth_opts.errors)
396 return 0;
397 err = intel_bts_synth_error(btsq->bts, btsq->cpu,
398 btsq->pid, btsq->tid,
399 branch->from);
400 return err;
401 }
402 btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
403 /* Check for an async branch into the kernel */
404 if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
405 machine__kernel_ip(btsq->bts->machine, branch->to) &&
406 btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
407 PERF_IP_FLAG_CALL |
408 PERF_IP_FLAG_SYSCALLRET))
409 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
410 PERF_IP_FLAG_CALL |
411 PERF_IP_FLAG_ASYNC |
412 PERF_IP_FLAG_INTERRUPT;
413 }
414
415 return 0;
416}
417
418static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
419 struct auxtrace_buffer *buffer)
420{
421 struct branch *branch;
422 size_t sz, bsz = sizeof(struct branch);
423 u32 filter = btsq->bts->branches_filter;
424 int err = 0;
425
426 if (buffer->use_data) {
427 sz = buffer->use_size;
428 branch = buffer->use_data;
429 } else {
430 sz = buffer->size;
431 branch = buffer->data;
432 }
433
434 if (!btsq->bts->sample_branches)
435 return 0;
436
437 for (; sz > bsz; branch += 1, sz -= bsz) {
438 if (!branch->from && !branch->to)
439 continue;
440 intel_bts_get_branch_type(btsq, branch);
441 if (filter && !(filter & btsq->sample_flags))
442 continue;
443 err = intel_bts_synth_branch_sample(btsq, branch);
444 if (err)
445 break;
446 }
447 return err;
448}
449
450static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
451{
452 struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
453 struct auxtrace_queue *queue;
454 struct thread *thread;
455 int err;
456
457 if (btsq->done)
458 return 1;
459
460 if (btsq->pid == -1) {
461 thread = machine__find_thread(btsq->bts->machine, -1,
462 btsq->tid);
463 if (thread)
464 btsq->pid = thread->pid_;
465 } else {
466 thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
467 btsq->tid);
468 }
469
470 queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
471
472 if (!buffer)
473 buffer = auxtrace_buffer__next(queue, NULL);
474
475 if (!buffer) {
476 if (!btsq->bts->sampling_mode)
477 btsq->done = 1;
478 err = 1;
479 goto out_put;
480 }
481
482 /* Currently there is no support for split buffers */
483 if (buffer->consecutive) {
484 err = -EINVAL;
485 goto out_put;
486 }
487
488 if (!buffer->data) {
489 int fd = perf_data_file__fd(btsq->bts->session->file);
490
491 buffer->data = auxtrace_buffer__get_data(buffer, fd);
492 if (!buffer->data) {
493 err = -ENOMEM;
494 goto out_put;
495 }
496 }
497
498 if (btsq->bts->snapshot_mode && !buffer->consecutive &&
499 intel_bts_do_fix_overlap(queue, buffer)) {
500 err = -ENOMEM;
501 goto out_put;
502 }
503
504 if (!btsq->bts->synth_opts.callchain && thread &&
505 (!old_buffer || btsq->bts->sampling_mode ||
506 (btsq->bts->snapshot_mode && !buffer->consecutive)))
507 thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
508
509 err = intel_bts_process_buffer(btsq, buffer);
510
511 auxtrace_buffer__drop_data(buffer);
512
513 btsq->buffer = auxtrace_buffer__next(queue, buffer);
514 if (btsq->buffer) {
515 if (timestamp)
516 *timestamp = btsq->buffer->reference;
517 } else {
518 if (!btsq->bts->sampling_mode)
519 btsq->done = 1;
520 }
521out_put:
522 thread__put(thread);
523 return err;
524}
525
526static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
527{
528 u64 ts = 0;
529 int ret;
530
531 while (1) {
532 ret = intel_bts_process_queue(btsq, &ts);
533 if (ret < 0)
534 return ret;
535 if (ret)
536 break;
537 }
538 return 0;
539}
540
541static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
542{
543 struct auxtrace_queues *queues = &bts->queues;
544 unsigned int i;
545
546 for (i = 0; i < queues->nr_queues; i++) {
547 struct auxtrace_queue *queue = &bts->queues.queue_array[i];
548 struct intel_bts_queue *btsq = queue->priv;
549
550 if (btsq && btsq->tid == tid)
551 return intel_bts_flush_queue(btsq);
552 }
553 return 0;
554}
555
556static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
557{
558 while (1) {
559 unsigned int queue_nr;
560 struct auxtrace_queue *queue;
561 struct intel_bts_queue *btsq;
562 u64 ts = 0;
563 int ret;
564
565 if (!bts->heap.heap_cnt)
566 return 0;
567
568 if (bts->heap.heap_array[0].ordinal > timestamp)
569 return 0;
570
571 queue_nr = bts->heap.heap_array[0].queue_nr;
572 queue = &bts->queues.queue_array[queue_nr];
573 btsq = queue->priv;
574
575 auxtrace_heap__pop(&bts->heap);
576
577 ret = intel_bts_process_queue(btsq, &ts);
578 if (ret < 0) {
579 auxtrace_heap__add(&bts->heap, queue_nr, ts);
580 return ret;
581 }
582
583 if (!ret) {
584 ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
585 if (ret < 0)
586 return ret;
587 } else {
588 btsq->on_heap = false;
589 }
590 }
591
592 return 0;
593}
594
595static int intel_bts_process_event(struct perf_session *session,
596 union perf_event *event,
597 struct perf_sample *sample,
598 struct perf_tool *tool)
599{
600 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
601 auxtrace);
602 u64 timestamp;
603 int err;
604
605 if (dump_trace)
606 return 0;
607
608 if (!tool->ordered_events) {
609 pr_err("Intel BTS requires ordered events\n");
610 return -EINVAL;
611 }
612
613 if (sample->time && sample->time != (u64)-1)
614 timestamp = perf_time_to_tsc(sample->time, &bts->tc);
615 else
616 timestamp = 0;
617
618 err = intel_bts_update_queues(bts);
619 if (err)
620 return err;
621
622 err = intel_bts_process_queues(bts, timestamp);
623 if (err)
624 return err;
625 if (event->header.type == PERF_RECORD_EXIT) {
626 err = intel_bts_process_tid_exit(bts, event->comm.tid);
627 if (err)
628 return err;
629 }
630
631 if (event->header.type == PERF_RECORD_AUX &&
632 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
633 bts->synth_opts.errors)
634 err = intel_bts_lost(bts, sample);
635
636 return err;
637}
638
639static int intel_bts_process_auxtrace_event(struct perf_session *session,
640 union perf_event *event,
641 struct perf_tool *tool __maybe_unused)
642{
643 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
644 auxtrace);
645
646 if (bts->sampling_mode)
647 return 0;
648
649 if (!bts->data_queued) {
650 struct auxtrace_buffer *buffer;
651 off_t data_offset;
652 int fd = perf_data_file__fd(session->file);
653 int err;
654
655 if (perf_data_file__is_pipe(session->file)) {
656 data_offset = 0;
657 } else {
658 data_offset = lseek(fd, 0, SEEK_CUR);
659 if (data_offset == -1)
660 return -errno;
661 }
662
663 err = auxtrace_queues__add_event(&bts->queues, session, event,
664 data_offset, &buffer);
665 if (err)
666 return err;
667
668 /* Dump here now we have copied a piped trace out of the pipe */
669 if (dump_trace) {
670 if (auxtrace_buffer__get_data(buffer, fd)) {
671 intel_bts_dump_event(bts, buffer->data,
672 buffer->size);
673 auxtrace_buffer__put_data(buffer);
674 }
675 }
676 }
677
678 return 0;
679}
680
681static int intel_bts_flush(struct perf_session *session __maybe_unused,
682 struct perf_tool *tool __maybe_unused)
683{
684 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
685 auxtrace);
686 int ret;
687
688 if (dump_trace || bts->sampling_mode)
689 return 0;
690
691 if (!tool->ordered_events)
692 return -EINVAL;
693
694 ret = intel_bts_update_queues(bts);
695 if (ret < 0)
696 return ret;
697
698 return intel_bts_process_queues(bts, MAX_TIMESTAMP);
699}
700
701static void intel_bts_free_queue(void *priv)
702{
703 struct intel_bts_queue *btsq = priv;
704
705 if (!btsq)
706 return;
707 free(btsq);
708}
709
710static void intel_bts_free_events(struct perf_session *session)
711{
712 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
713 auxtrace);
714 struct auxtrace_queues *queues = &bts->queues;
715 unsigned int i;
716
717 for (i = 0; i < queues->nr_queues; i++) {
718 intel_bts_free_queue(queues->queue_array[i].priv);
719 queues->queue_array[i].priv = NULL;
720 }
721 auxtrace_queues__free(queues);
722}
723
724static void intel_bts_free(struct perf_session *session)
725{
726 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
727 auxtrace);
728
729 auxtrace_heap__free(&bts->heap);
730 intel_bts_free_events(session);
731 session->auxtrace = NULL;
732 free(bts);
733}
734
735struct intel_bts_synth {
736 struct perf_tool dummy_tool;
737 struct perf_session *session;
738};
739
740static int intel_bts_event_synth(struct perf_tool *tool,
741 union perf_event *event,
742 struct perf_sample *sample __maybe_unused,
743 struct machine *machine __maybe_unused)
744{
745 struct intel_bts_synth *intel_bts_synth =
746 container_of(tool, struct intel_bts_synth, dummy_tool);
747
748 return perf_session__deliver_synth_event(intel_bts_synth->session,
749 event, NULL);
750}
751
752static int intel_bts_synth_event(struct perf_session *session,
753 struct perf_event_attr *attr, u64 id)
754{
755 struct intel_bts_synth intel_bts_synth;
756
757 memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
758 intel_bts_synth.session = session;
759
760 return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
761 &id, intel_bts_event_synth);
762}
763
764static int intel_bts_synth_events(struct intel_bts *bts,
765 struct perf_session *session)
766{
767 struct perf_evlist *evlist = session->evlist;
768 struct perf_evsel *evsel;
769 struct perf_event_attr attr;
770 bool found = false;
771 u64 id;
772 int err;
773
774 evlist__for_each(evlist, evsel) {
775 if (evsel->attr.type == bts->pmu_type && evsel->ids) {
776 found = true;
777 break;
778 }
779 }
780
781 if (!found) {
782 pr_debug("There are no selected events with Intel BTS data\n");
783 return 0;
784 }
785
786 memset(&attr, 0, sizeof(struct perf_event_attr));
787 attr.size = sizeof(struct perf_event_attr);
788 attr.type = PERF_TYPE_HARDWARE;
789 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
790 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
791 PERF_SAMPLE_PERIOD;
792 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
793 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
794 attr.exclude_user = evsel->attr.exclude_user;
795 attr.exclude_kernel = evsel->attr.exclude_kernel;
796 attr.exclude_hv = evsel->attr.exclude_hv;
797 attr.exclude_host = evsel->attr.exclude_host;
798 attr.exclude_guest = evsel->attr.exclude_guest;
799 attr.sample_id_all = evsel->attr.sample_id_all;
800 attr.read_format = evsel->attr.read_format;
801
802 id = evsel->id[0] + 1000000000;
803 if (!id)
804 id = 1;
805
806 if (bts->synth_opts.branches) {
807 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
808 attr.sample_period = 1;
809 attr.sample_type |= PERF_SAMPLE_ADDR;
810 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
811 id, (u64)attr.sample_type);
812 err = intel_bts_synth_event(session, &attr, id);
813 if (err) {
814 pr_err("%s: failed to synthesize 'branches' event type\n",
815 __func__);
816 return err;
817 }
818 bts->sample_branches = true;
819 bts->branches_sample_type = attr.sample_type;
820 bts->branches_id = id;
821 /*
822 * We only use sample types from PERF_SAMPLE_MASK so we can use
823 * __perf_evsel__sample_size() here.
824 */
825 bts->branches_event_size = sizeof(struct sample_event) +
826 __perf_evsel__sample_size(attr.sample_type);
827 }
828
829 bts->synth_needs_swap = evsel->needs_swap;
830
831 return 0;
832}
833
834static const char * const intel_bts_info_fmts[] = {
835 [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
836 [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
837 [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
838 [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
839 [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
840 [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
841};
842
843static void intel_bts_print_info(u64 *arr, int start, int finish)
844{
845 int i;
846
847 if (!dump_trace)
848 return;
849
850 for (i = start; i <= finish; i++)
851 fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
852}
853
854u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
855
856int intel_bts_process_auxtrace_info(union perf_event *event,
857 struct perf_session *session)
858{
859 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
860 size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
861 struct intel_bts *bts;
862 int err;
863
864 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
865 min_sz)
866 return -EINVAL;
867
868 bts = zalloc(sizeof(struct intel_bts));
869 if (!bts)
870 return -ENOMEM;
871
872 err = auxtrace_queues__init(&bts->queues);
873 if (err)
874 goto err_free;
875
876 bts->session = session;
877 bts->machine = &session->machines.host; /* No kvm support */
878 bts->auxtrace_type = auxtrace_info->type;
879 bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
880 bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
881 bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
882 bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
883 bts->cap_user_time_zero =
884 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
885 bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
886
887 bts->sampling_mode = false;
888
889 bts->auxtrace.process_event = intel_bts_process_event;
890 bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
891 bts->auxtrace.flush_events = intel_bts_flush;
892 bts->auxtrace.free_events = intel_bts_free_events;
893 bts->auxtrace.free = intel_bts_free;
894 session->auxtrace = &bts->auxtrace;
895
896 intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
897 INTEL_BTS_SNAPSHOT_MODE);
898
899 if (dump_trace)
900 return 0;
901
902 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
903 bts->synth_opts = *session->itrace_synth_opts;
904 else
905 itrace_synth_opts__set_default(&bts->synth_opts);
906
907 if (bts->synth_opts.calls)
908 bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
909 PERF_IP_FLAG_TRACE_END;
910 if (bts->synth_opts.returns)
911 bts->branches_filter |= PERF_IP_FLAG_RETURN |
912 PERF_IP_FLAG_TRACE_BEGIN;
913
914 err = intel_bts_synth_events(bts, session);
915 if (err)
916 goto err_free_queues;
917
918 err = auxtrace_queues__process_index(&bts->queues, session);
919 if (err)
920 goto err_free_queues;
921
922 if (bts->queues.populated)
923 bts->data_queued = true;
924
925 return 0;
926
927err_free_queues:
928 auxtrace_queues__free(&bts->queues);
929 session->auxtrace = NULL;
930err_free:
931 free(bts);
932 return err;
933}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000000..ca65e21b3e83
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,43 @@
1/*
2 * intel-bts.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__PERF_INTEL_BTS_H__
17#define INCLUDE__PERF_INTEL_BTS_H__
18
19#define INTEL_BTS_PMU_NAME "intel_bts"
20
21enum {
22 INTEL_BTS_PMU_TYPE,
23 INTEL_BTS_TIME_SHIFT,
24 INTEL_BTS_TIME_MULT,
25 INTEL_BTS_TIME_ZERO,
26 INTEL_BTS_CAP_USER_TIME_ZERO,
27 INTEL_BTS_SNAPSHOT_MODE,
28 INTEL_BTS_AUXTRACE_PRIV_MAX,
29};
30
31#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
32
33struct auxtrace_record;
34struct perf_tool;
35union perf_event;
36struct perf_session;
37
38struct auxtrace_record *intel_bts_recording_init(int *err);
39
40int intel_bts_process_auxtrace_info(union perf_event *event,
41 struct perf_session *session);
42
43#endif
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 3c71138e7672..89c91a1a67e7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name)
462 LIST_HEAD(aliases); 462 LIST_HEAD(aliases);
463 __u32 type; 463 __u32 type;
464 464
465 /* No support for intel_bts so disallow it */
466 if (!strcmp(name, "intel_bts"))
467 return NULL;
468
469 /* 465 /*
470 * The pmu data we store & need consists of the pmu 466 * The pmu data we store & need consists of the pmu
471 * type value and format definitions. Load both right 467 * type value and format definitions. Load both right