aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/intel-bts.c
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-07-17 12:33:43 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-08-21 10:34:10 -0400
commitd0170af7004dce9cd90b749842c37e379476cbc8 (patch)
treee30cf4c15b6b565cfe969b1613e297343b5b2932 /tools/perf/util/intel-bts.c
parent6f56e9cf581c6cedcaea3eb69444b169867ccf3d (diff)
perf tools: Add Intel BTS support
Intel BTS support fits within the new auxtrace infrastructure. Recording is supporting by identifying the Intel BTS PMU, parsing options and setting up events. Decoding is supported by queuing up trace data by thread and then decoding synchronously delivering synthesized event samples into the session processing for tools to consume. Committer note: E.g: [root@felicio ~]# perf record --per-thread -e intel_bts// ls anaconda-ks.cfg apctest.output bin kernel-rt-3.10.0-298.rt56.171.el7.x86_64.rpm libexec lock_page.bpf.c perf.data perf.data.old [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 4.367 MB perf.data ] [root@felicio ~]# perf evlist -v intel_bts//: type: 6, size: 112, { sample_period, sample_freq }: 1, sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1 dummy:u: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1, sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1 [root@felicio ~]# perf script # the navigate in the pager to some interesting place: ls 1843 1 branches: ffffffff810a60cb flush_signal_handlers ([kernel.kallsyms]) => ffffffff8121a522 setup_new_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8121a529 setup_new_exec ([kernel.kallsyms]) => ffffffff8122fa30 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa5d do_close_on_exec ([kernel.kallsyms]) => ffffffff81767ae0 _raw_spin_lock ([kernel.kallsyms]) ls 1843 1 branches: ffffffff81767af4 _raw_spin_lock ([kernel.kallsyms]) => ffffffff8122fa62 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fa8e do_close_on_exec ([kernel.kallsyms]) => ffffffff8122faf0 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122faf7 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fa8b do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fac9 do_close_on_exec ([kernel.kallsyms]) => ffffffff8122fad2 do_close_on_exec ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8122fadd do_close_on_exec ([kernel.kallsyms]) => ffffffff8120fc80 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fcaf filp_close ([kernel.kallsyms]) => ffffffff8120fcb6 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fcc2 filp_close ([kernel.kallsyms]) => ffffffff812547f0 dnotify_flush ([kernel.kallsyms]) ls 1843 1 branches: ffffffff81254823 dnotify_flush ([kernel.kallsyms]) => ffffffff8120fcc7 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fccd filp_close ([kernel.kallsyms]) => ffffffff81261790 locks_remove_posix ([kernel.kallsyms]) ls 1843 1 branches: ffffffff812617a3 locks_remove_posix ([kernel.kallsyms]) => ffffffff812617b9 locks_remove_posix ([kernel.kallsyms]) ls 1843 1 branches: ffffffff812617b9 locks_remove_posix ([kernel.kallsyms]) => ffffffff8120fcd2 filp_close ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8120fcd5 filp_close ([kernel.kallsyms]) => ffffffff812142c0 fput ([kernel.kallsyms]) ls 1843 1 branches: ffffffff812142d6 fput ([kernel.kallsyms]) => ffffffff812142df fput ([kernel.kallsyms]) ls 1843 1 branches: ffffffff8121430c fput ([kernel.kallsyms]) => ffffffff810b6580 task_work_add ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810b65ad task_work_add ([kernel.kallsyms]) => ffffffff810b65b1 task_work_add ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810b65c1 task_work_add ([kernel.kallsyms]) => ffffffff810bc710 kick_process ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810bc725 kick_process ([kernel.kallsyms]) => ffffffff810bc742 kick_process ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810bc742 kick_process ([kernel.kallsyms]) => ffffffff810b65c6 task_work_add ([kernel.kallsyms]) ls 1843 1 branches: ffffffff810b65c9 task_work_add ([kernel.kallsyms]) => ffffffff81214311 fput ([kernel.kallsyms]) Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1437150840-31811-9-git-send-email-adrian.hunter@intel.com [ Merged sample->time fix for bug found after first round of testing on slightly older kernel ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/intel-bts.c')
-rw-r--r--tools/perf/util/intel-bts.c933
1 files changed, 933 insertions, 0 deletions
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000000..ea768625ab5b
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,933 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <endian.h>
17#include <byteswap.h>
18#include <linux/kernel.h>
19#include <linux/types.h>
20#include <linux/bitops.h>
21#include <linux/log2.h>
22
23#include "cpumap.h"
24#include "color.h"
25#include "evsel.h"
26#include "evlist.h"
27#include "machine.h"
28#include "session.h"
29#include "util.h"
30#include "thread.h"
31#include "thread-stack.h"
32#include "debug.h"
33#include "tsc.h"
34#include "auxtrace.h"
35#include "intel-pt-decoder/intel-pt-insn-decoder.h"
36#include "intel-bts.h"
37
38#define MAX_TIMESTAMP (~0ULL)
39
40#define INTEL_BTS_ERR_NOINSN 5
41#define INTEL_BTS_ERR_LOST 9
42
43#if __BYTE_ORDER == __BIG_ENDIAN
44#define le64_to_cpu bswap_64
45#else
46#define le64_to_cpu
47#endif
48
49struct intel_bts {
50 struct auxtrace auxtrace;
51 struct auxtrace_queues queues;
52 struct auxtrace_heap heap;
53 u32 auxtrace_type;
54 struct perf_session *session;
55 struct machine *machine;
56 bool sampling_mode;
57 bool snapshot_mode;
58 bool data_queued;
59 u32 pmu_type;
60 struct perf_tsc_conversion tc;
61 bool cap_user_time_zero;
62 struct itrace_synth_opts synth_opts;
63 bool sample_branches;
64 u32 branches_filter;
65 u64 branches_sample_type;
66 u64 branches_id;
67 size_t branches_event_size;
68 bool synth_needs_swap;
69};
70
71struct intel_bts_queue {
72 struct intel_bts *bts;
73 unsigned int queue_nr;
74 struct auxtrace_buffer *buffer;
75 bool on_heap;
76 bool done;
77 pid_t pid;
78 pid_t tid;
79 int cpu;
80 u64 time;
81 struct intel_pt_insn intel_pt_insn;
82 u32 sample_flags;
83};
84
85struct branch {
86 u64 from;
87 u64 to;
88 u64 misc;
89};
90
91static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
92 unsigned char *buf, size_t len)
93{
94 struct branch *branch;
95 size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
96 const char *color = PERF_COLOR_BLUE;
97
98 color_fprintf(stdout, color,
99 ". ... Intel BTS data: size %zu bytes\n",
100 len);
101
102 while (len) {
103 if (len >= br_sz)
104 sz = br_sz;
105 else
106 sz = len;
107 printf(".");
108 color_fprintf(stdout, color, " %08x: ", pos);
109 for (i = 0; i < sz; i++)
110 color_fprintf(stdout, color, " %02x", buf[i]);
111 for (; i < br_sz; i++)
112 color_fprintf(stdout, color, " ");
113 if (len >= br_sz) {
114 branch = (struct branch *)buf;
115 color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
116 le64_to_cpu(branch->from),
117 le64_to_cpu(branch->to),
118 le64_to_cpu(branch->misc) & 0x10 ?
119 "pred" : "miss");
120 } else {
121 color_fprintf(stdout, color, " Bad record!\n");
122 }
123 pos += sz;
124 buf += sz;
125 len -= sz;
126 }
127}
128
129static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
130 size_t len)
131{
132 printf(".\n");
133 intel_bts_dump(bts, buf, len);
134}
135
136static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
137{
138 union perf_event event;
139 int err;
140
141 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
142 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
143 sample->tid, 0, "Lost trace data");
144
145 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
146 if (err)
147 pr_err("Intel BTS: failed to deliver error event, error %d\n",
148 err);
149
150 return err;
151}
152
153static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
154 unsigned int queue_nr)
155{
156 struct intel_bts_queue *btsq;
157
158 btsq = zalloc(sizeof(struct intel_bts_queue));
159 if (!btsq)
160 return NULL;
161
162 btsq->bts = bts;
163 btsq->queue_nr = queue_nr;
164 btsq->pid = -1;
165 btsq->tid = -1;
166 btsq->cpu = -1;
167
168 return btsq;
169}
170
171static int intel_bts_setup_queue(struct intel_bts *bts,
172 struct auxtrace_queue *queue,
173 unsigned int queue_nr)
174{
175 struct intel_bts_queue *btsq = queue->priv;
176
177 if (list_empty(&queue->head))
178 return 0;
179
180 if (!btsq) {
181 btsq = intel_bts_alloc_queue(bts, queue_nr);
182 if (!btsq)
183 return -ENOMEM;
184 queue->priv = btsq;
185
186 if (queue->cpu != -1)
187 btsq->cpu = queue->cpu;
188 btsq->tid = queue->tid;
189 }
190
191 if (bts->sampling_mode)
192 return 0;
193
194 if (!btsq->on_heap && !btsq->buffer) {
195 int ret;
196
197 btsq->buffer = auxtrace_buffer__next(queue, NULL);
198 if (!btsq->buffer)
199 return 0;
200
201 ret = auxtrace_heap__add(&bts->heap, queue_nr,
202 btsq->buffer->reference);
203 if (ret)
204 return ret;
205 btsq->on_heap = true;
206 }
207
208 return 0;
209}
210
211static int intel_bts_setup_queues(struct intel_bts *bts)
212{
213 unsigned int i;
214 int ret;
215
216 for (i = 0; i < bts->queues.nr_queues; i++) {
217 ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
218 i);
219 if (ret)
220 return ret;
221 }
222 return 0;
223}
224
225static inline int intel_bts_update_queues(struct intel_bts *bts)
226{
227 if (bts->queues.new_data) {
228 bts->queues.new_data = false;
229 return intel_bts_setup_queues(bts);
230 }
231 return 0;
232}
233
234static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
235 unsigned char *buf_b, size_t len_b)
236{
237 size_t offs, len;
238
239 if (len_a > len_b)
240 offs = len_a - len_b;
241 else
242 offs = 0;
243
244 for (; offs < len_a; offs += sizeof(struct branch)) {
245 len = len_a - offs;
246 if (!memcmp(buf_a + offs, buf_b, len))
247 return buf_b + len;
248 }
249
250 return buf_b;
251}
252
253static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
254 struct auxtrace_buffer *b)
255{
256 struct auxtrace_buffer *a;
257 void *start;
258
259 if (b->list.prev == &queue->head)
260 return 0;
261 a = list_entry(b->list.prev, struct auxtrace_buffer, list);
262 start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
263 if (!start)
264 return -EINVAL;
265 b->use_size = b->data + b->size - start;
266 b->use_data = start;
267 return 0;
268}
269
270static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
271 struct branch *branch)
272{
273 int ret;
274 struct intel_bts *bts = btsq->bts;
275 union perf_event event;
276 struct perf_sample sample = { .ip = 0, };
277
278 event.sample.header.type = PERF_RECORD_SAMPLE;
279 event.sample.header.misc = PERF_RECORD_MISC_USER;
280 event.sample.header.size = sizeof(struct perf_event_header);
281
282 sample.ip = le64_to_cpu(branch->from);
283 sample.pid = btsq->pid;
284 sample.tid = btsq->tid;
285 sample.addr = le64_to_cpu(branch->to);
286 sample.id = btsq->bts->branches_id;
287 sample.stream_id = btsq->bts->branches_id;
288 sample.period = 1;
289 sample.cpu = btsq->cpu;
290 sample.flags = btsq->sample_flags;
291 sample.insn_len = btsq->intel_pt_insn.length;
292
293 if (bts->synth_opts.inject) {
294 event.sample.header.size = bts->branches_event_size;
295 ret = perf_event__synthesize_sample(&event,
296 bts->branches_sample_type,
297 0, &sample,
298 bts->synth_needs_swap);
299 if (ret)
300 return ret;
301 }
302
303 ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
304 if (ret)
305 pr_err("Intel BTS: failed to deliver branch event, error %d\n",
306 ret);
307
308 return ret;
309}
310
311static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
312{
313 struct machine *machine = btsq->bts->machine;
314 struct thread *thread;
315 struct addr_location al;
316 unsigned char buf[1024];
317 size_t bufsz;
318 ssize_t len;
319 int x86_64;
320 uint8_t cpumode;
321 int err = -1;
322
323 bufsz = intel_pt_insn_max_size();
324
325 if (machine__kernel_ip(machine, ip))
326 cpumode = PERF_RECORD_MISC_KERNEL;
327 else
328 cpumode = PERF_RECORD_MISC_USER;
329
330 thread = machine__find_thread(machine, -1, btsq->tid);
331 if (!thread)
332 return -1;
333
334 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
335 if (!al.map || !al.map->dso)
336 goto out_put;
337
338 len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
339 if (len <= 0)
340 goto out_put;
341
342 /* Load maps to ensure dso->is_64_bit has been updated */
343 map__load(al.map, machine->symbol_filter);
344
345 x86_64 = al.map->dso->is_64_bit;
346
347 if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
348 goto out_put;
349
350 err = 0;
351out_put:
352 thread__put(thread);
353 return err;
354}
355
356static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
357 pid_t tid, u64 ip)
358{
359 union perf_event event;
360 int err;
361
362 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
363 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
364 "Failed to get instruction");
365
366 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
367 if (err)
368 pr_err("Intel BTS: failed to deliver error event, error %d\n",
369 err);
370
371 return err;
372}
373
374static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
375 struct branch *branch)
376{
377 int err;
378
379 if (!branch->from) {
380 if (branch->to)
381 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
382 PERF_IP_FLAG_TRACE_BEGIN;
383 else
384 btsq->sample_flags = 0;
385 btsq->intel_pt_insn.length = 0;
386 } else if (!branch->to) {
387 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
388 PERF_IP_FLAG_TRACE_END;
389 btsq->intel_pt_insn.length = 0;
390 } else {
391 err = intel_bts_get_next_insn(btsq, branch->from);
392 if (err) {
393 btsq->sample_flags = 0;
394 btsq->intel_pt_insn.length = 0;
395 if (!btsq->bts->synth_opts.errors)
396 return 0;
397 err = intel_bts_synth_error(btsq->bts, btsq->cpu,
398 btsq->pid, btsq->tid,
399 branch->from);
400 return err;
401 }
402 btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
403 /* Check for an async branch into the kernel */
404 if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
405 machine__kernel_ip(btsq->bts->machine, branch->to) &&
406 btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
407 PERF_IP_FLAG_CALL |
408 PERF_IP_FLAG_SYSCALLRET))
409 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
410 PERF_IP_FLAG_CALL |
411 PERF_IP_FLAG_ASYNC |
412 PERF_IP_FLAG_INTERRUPT;
413 }
414
415 return 0;
416}
417
418static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
419 struct auxtrace_buffer *buffer)
420{
421 struct branch *branch;
422 size_t sz, bsz = sizeof(struct branch);
423 u32 filter = btsq->bts->branches_filter;
424 int err = 0;
425
426 if (buffer->use_data) {
427 sz = buffer->use_size;
428 branch = buffer->use_data;
429 } else {
430 sz = buffer->size;
431 branch = buffer->data;
432 }
433
434 if (!btsq->bts->sample_branches)
435 return 0;
436
437 for (; sz > bsz; branch += 1, sz -= bsz) {
438 if (!branch->from && !branch->to)
439 continue;
440 intel_bts_get_branch_type(btsq, branch);
441 if (filter && !(filter & btsq->sample_flags))
442 continue;
443 err = intel_bts_synth_branch_sample(btsq, branch);
444 if (err)
445 break;
446 }
447 return err;
448}
449
450static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
451{
452 struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
453 struct auxtrace_queue *queue;
454 struct thread *thread;
455 int err;
456
457 if (btsq->done)
458 return 1;
459
460 if (btsq->pid == -1) {
461 thread = machine__find_thread(btsq->bts->machine, -1,
462 btsq->tid);
463 if (thread)
464 btsq->pid = thread->pid_;
465 } else {
466 thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
467 btsq->tid);
468 }
469
470 queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
471
472 if (!buffer)
473 buffer = auxtrace_buffer__next(queue, NULL);
474
475 if (!buffer) {
476 if (!btsq->bts->sampling_mode)
477 btsq->done = 1;
478 err = 1;
479 goto out_put;
480 }
481
482 /* Currently there is no support for split buffers */
483 if (buffer->consecutive) {
484 err = -EINVAL;
485 goto out_put;
486 }
487
488 if (!buffer->data) {
489 int fd = perf_data_file__fd(btsq->bts->session->file);
490
491 buffer->data = auxtrace_buffer__get_data(buffer, fd);
492 if (!buffer->data) {
493 err = -ENOMEM;
494 goto out_put;
495 }
496 }
497
498 if (btsq->bts->snapshot_mode && !buffer->consecutive &&
499 intel_bts_do_fix_overlap(queue, buffer)) {
500 err = -ENOMEM;
501 goto out_put;
502 }
503
504 if (!btsq->bts->synth_opts.callchain && thread &&
505 (!old_buffer || btsq->bts->sampling_mode ||
506 (btsq->bts->snapshot_mode && !buffer->consecutive)))
507 thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
508
509 err = intel_bts_process_buffer(btsq, buffer);
510
511 auxtrace_buffer__drop_data(buffer);
512
513 btsq->buffer = auxtrace_buffer__next(queue, buffer);
514 if (btsq->buffer) {
515 if (timestamp)
516 *timestamp = btsq->buffer->reference;
517 } else {
518 if (!btsq->bts->sampling_mode)
519 btsq->done = 1;
520 }
521out_put:
522 thread__put(thread);
523 return err;
524}
525
526static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
527{
528 u64 ts = 0;
529 int ret;
530
531 while (1) {
532 ret = intel_bts_process_queue(btsq, &ts);
533 if (ret < 0)
534 return ret;
535 if (ret)
536 break;
537 }
538 return 0;
539}
540
541static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
542{
543 struct auxtrace_queues *queues = &bts->queues;
544 unsigned int i;
545
546 for (i = 0; i < queues->nr_queues; i++) {
547 struct auxtrace_queue *queue = &bts->queues.queue_array[i];
548 struct intel_bts_queue *btsq = queue->priv;
549
550 if (btsq && btsq->tid == tid)
551 return intel_bts_flush_queue(btsq);
552 }
553 return 0;
554}
555
556static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
557{
558 while (1) {
559 unsigned int queue_nr;
560 struct auxtrace_queue *queue;
561 struct intel_bts_queue *btsq;
562 u64 ts = 0;
563 int ret;
564
565 if (!bts->heap.heap_cnt)
566 return 0;
567
568 if (bts->heap.heap_array[0].ordinal > timestamp)
569 return 0;
570
571 queue_nr = bts->heap.heap_array[0].queue_nr;
572 queue = &bts->queues.queue_array[queue_nr];
573 btsq = queue->priv;
574
575 auxtrace_heap__pop(&bts->heap);
576
577 ret = intel_bts_process_queue(btsq, &ts);
578 if (ret < 0) {
579 auxtrace_heap__add(&bts->heap, queue_nr, ts);
580 return ret;
581 }
582
583 if (!ret) {
584 ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
585 if (ret < 0)
586 return ret;
587 } else {
588 btsq->on_heap = false;
589 }
590 }
591
592 return 0;
593}
594
595static int intel_bts_process_event(struct perf_session *session,
596 union perf_event *event,
597 struct perf_sample *sample,
598 struct perf_tool *tool)
599{
600 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
601 auxtrace);
602 u64 timestamp;
603 int err;
604
605 if (dump_trace)
606 return 0;
607
608 if (!tool->ordered_events) {
609 pr_err("Intel BTS requires ordered events\n");
610 return -EINVAL;
611 }
612
613 if (sample->time && sample->time != (u64)-1)
614 timestamp = perf_time_to_tsc(sample->time, &bts->tc);
615 else
616 timestamp = 0;
617
618 err = intel_bts_update_queues(bts);
619 if (err)
620 return err;
621
622 err = intel_bts_process_queues(bts, timestamp);
623 if (err)
624 return err;
625 if (event->header.type == PERF_RECORD_EXIT) {
626 err = intel_bts_process_tid_exit(bts, event->comm.tid);
627 if (err)
628 return err;
629 }
630
631 if (event->header.type == PERF_RECORD_AUX &&
632 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
633 bts->synth_opts.errors)
634 err = intel_bts_lost(bts, sample);
635
636 return err;
637}
638
639static int intel_bts_process_auxtrace_event(struct perf_session *session,
640 union perf_event *event,
641 struct perf_tool *tool __maybe_unused)
642{
643 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
644 auxtrace);
645
646 if (bts->sampling_mode)
647 return 0;
648
649 if (!bts->data_queued) {
650 struct auxtrace_buffer *buffer;
651 off_t data_offset;
652 int fd = perf_data_file__fd(session->file);
653 int err;
654
655 if (perf_data_file__is_pipe(session->file)) {
656 data_offset = 0;
657 } else {
658 data_offset = lseek(fd, 0, SEEK_CUR);
659 if (data_offset == -1)
660 return -errno;
661 }
662
663 err = auxtrace_queues__add_event(&bts->queues, session, event,
664 data_offset, &buffer);
665 if (err)
666 return err;
667
668 /* Dump here now we have copied a piped trace out of the pipe */
669 if (dump_trace) {
670 if (auxtrace_buffer__get_data(buffer, fd)) {
671 intel_bts_dump_event(bts, buffer->data,
672 buffer->size);
673 auxtrace_buffer__put_data(buffer);
674 }
675 }
676 }
677
678 return 0;
679}
680
681static int intel_bts_flush(struct perf_session *session __maybe_unused,
682 struct perf_tool *tool __maybe_unused)
683{
684 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
685 auxtrace);
686 int ret;
687
688 if (dump_trace || bts->sampling_mode)
689 return 0;
690
691 if (!tool->ordered_events)
692 return -EINVAL;
693
694 ret = intel_bts_update_queues(bts);
695 if (ret < 0)
696 return ret;
697
698 return intel_bts_process_queues(bts, MAX_TIMESTAMP);
699}
700
701static void intel_bts_free_queue(void *priv)
702{
703 struct intel_bts_queue *btsq = priv;
704
705 if (!btsq)
706 return;
707 free(btsq);
708}
709
710static void intel_bts_free_events(struct perf_session *session)
711{
712 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
713 auxtrace);
714 struct auxtrace_queues *queues = &bts->queues;
715 unsigned int i;
716
717 for (i = 0; i < queues->nr_queues; i++) {
718 intel_bts_free_queue(queues->queue_array[i].priv);
719 queues->queue_array[i].priv = NULL;
720 }
721 auxtrace_queues__free(queues);
722}
723
724static void intel_bts_free(struct perf_session *session)
725{
726 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
727 auxtrace);
728
729 auxtrace_heap__free(&bts->heap);
730 intel_bts_free_events(session);
731 session->auxtrace = NULL;
732 free(bts);
733}
734
735struct intel_bts_synth {
736 struct perf_tool dummy_tool;
737 struct perf_session *session;
738};
739
740static int intel_bts_event_synth(struct perf_tool *tool,
741 union perf_event *event,
742 struct perf_sample *sample __maybe_unused,
743 struct machine *machine __maybe_unused)
744{
745 struct intel_bts_synth *intel_bts_synth =
746 container_of(tool, struct intel_bts_synth, dummy_tool);
747
748 return perf_session__deliver_synth_event(intel_bts_synth->session,
749 event, NULL);
750}
751
752static int intel_bts_synth_event(struct perf_session *session,
753 struct perf_event_attr *attr, u64 id)
754{
755 struct intel_bts_synth intel_bts_synth;
756
757 memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
758 intel_bts_synth.session = session;
759
760 return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
761 &id, intel_bts_event_synth);
762}
763
764static int intel_bts_synth_events(struct intel_bts *bts,
765 struct perf_session *session)
766{
767 struct perf_evlist *evlist = session->evlist;
768 struct perf_evsel *evsel;
769 struct perf_event_attr attr;
770 bool found = false;
771 u64 id;
772 int err;
773
774 evlist__for_each(evlist, evsel) {
775 if (evsel->attr.type == bts->pmu_type && evsel->ids) {
776 found = true;
777 break;
778 }
779 }
780
781 if (!found) {
782 pr_debug("There are no selected events with Intel BTS data\n");
783 return 0;
784 }
785
786 memset(&attr, 0, sizeof(struct perf_event_attr));
787 attr.size = sizeof(struct perf_event_attr);
788 attr.type = PERF_TYPE_HARDWARE;
789 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
790 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
791 PERF_SAMPLE_PERIOD;
792 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
793 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
794 attr.exclude_user = evsel->attr.exclude_user;
795 attr.exclude_kernel = evsel->attr.exclude_kernel;
796 attr.exclude_hv = evsel->attr.exclude_hv;
797 attr.exclude_host = evsel->attr.exclude_host;
798 attr.exclude_guest = evsel->attr.exclude_guest;
799 attr.sample_id_all = evsel->attr.sample_id_all;
800 attr.read_format = evsel->attr.read_format;
801
802 id = evsel->id[0] + 1000000000;
803 if (!id)
804 id = 1;
805
806 if (bts->synth_opts.branches) {
807 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
808 attr.sample_period = 1;
809 attr.sample_type |= PERF_SAMPLE_ADDR;
810 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
811 id, (u64)attr.sample_type);
812 err = intel_bts_synth_event(session, &attr, id);
813 if (err) {
814 pr_err("%s: failed to synthesize 'branches' event type\n",
815 __func__);
816 return err;
817 }
818 bts->sample_branches = true;
819 bts->branches_sample_type = attr.sample_type;
820 bts->branches_id = id;
821 /*
822 * We only use sample types from PERF_SAMPLE_MASK so we can use
823 * __perf_evsel__sample_size() here.
824 */
825 bts->branches_event_size = sizeof(struct sample_event) +
826 __perf_evsel__sample_size(attr.sample_type);
827 }
828
829 bts->synth_needs_swap = evsel->needs_swap;
830
831 return 0;
832}
833
834static const char * const intel_bts_info_fmts[] = {
835 [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
836 [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
837 [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
838 [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
839 [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
840 [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
841};
842
843static void intel_bts_print_info(u64 *arr, int start, int finish)
844{
845 int i;
846
847 if (!dump_trace)
848 return;
849
850 for (i = start; i <= finish; i++)
851 fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
852}
853
854u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
855
856int intel_bts_process_auxtrace_info(union perf_event *event,
857 struct perf_session *session)
858{
859 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
860 size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
861 struct intel_bts *bts;
862 int err;
863
864 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
865 min_sz)
866 return -EINVAL;
867
868 bts = zalloc(sizeof(struct intel_bts));
869 if (!bts)
870 return -ENOMEM;
871
872 err = auxtrace_queues__init(&bts->queues);
873 if (err)
874 goto err_free;
875
876 bts->session = session;
877 bts->machine = &session->machines.host; /* No kvm support */
878 bts->auxtrace_type = auxtrace_info->type;
879 bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
880 bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
881 bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
882 bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
883 bts->cap_user_time_zero =
884 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
885 bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
886
887 bts->sampling_mode = false;
888
889 bts->auxtrace.process_event = intel_bts_process_event;
890 bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
891 bts->auxtrace.flush_events = intel_bts_flush;
892 bts->auxtrace.free_events = intel_bts_free_events;
893 bts->auxtrace.free = intel_bts_free;
894 session->auxtrace = &bts->auxtrace;
895
896 intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
897 INTEL_BTS_SNAPSHOT_MODE);
898
899 if (dump_trace)
900 return 0;
901
902 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
903 bts->synth_opts = *session->itrace_synth_opts;
904 else
905 itrace_synth_opts__set_default(&bts->synth_opts);
906
907 if (bts->synth_opts.calls)
908 bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
909 PERF_IP_FLAG_TRACE_END;
910 if (bts->synth_opts.returns)
911 bts->branches_filter |= PERF_IP_FLAG_RETURN |
912 PERF_IP_FLAG_TRACE_BEGIN;
913
914 err = intel_bts_synth_events(bts, session);
915 if (err)
916 goto err_free_queues;
917
918 err = auxtrace_queues__process_index(&bts->queues, session);
919 if (err)
920 goto err_free_queues;
921
922 if (bts->queues.populated)
923 bts->data_queued = true;
924
925 return 0;
926
927err_free_queues:
928 auxtrace_queues__free(&bts->queues);
929 session->auxtrace = NULL;
930err_free:
931 free(bts);
932 return err;
933}