aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-07-17 12:33:41 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-08-17 10:11:36 -0400
commit90e457f7be0870052724b2d9c2c106e5847f2c19 (patch)
tree180c73f2e01bbc4211ae386d3368a09a1456d3f5
parentf4aa081949e7b6b01e711229c5a47ee3482a169c (diff)
perf tools: Add Intel PT support
Add support for Intel Processor Trace. Intel PT support fits within the new auxtrace infrastructure. Recording is supporting by identifying the Intel PT PMU, parsing options and setting up events. Decoding is supported by queuing up trace data by cpu or thread and then decoding synchronously delivering synthesized event samples into the session processing for tools to consume. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1437150840-31811-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/arch/x86/util/Build2
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c752
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/intel-pt.c1911
-rw-r--r--tools/perf/util/intel-pt.h51
5 files changed, 2717 insertions, 0 deletions
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index cfbccc4e3187..139608878888 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -6,3 +6,5 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o
6 6
7libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o 7libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
8libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 8libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
9
10libperf-$(CONFIG_AUXTRACE) += intel-pt.o
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
new file mode 100644
index 000000000000..da7d2c15e611
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -0,0 +1,752 @@
1/*
2 * intel_pt.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdbool.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/bitops.h>
20#include <linux/log2.h>
21
22#include "../../perf.h"
23#include "../../util/session.h"
24#include "../../util/event.h"
25#include "../../util/evlist.h"
26#include "../../util/evsel.h"
27#include "../../util/cpumap.h"
28#include "../../util/parse-options.h"
29#include "../../util/parse-events.h"
30#include "../../util/pmu.h"
31#include "../../util/debug.h"
32#include "../../util/auxtrace.h"
33#include "../../util/tsc.h"
34#include "../../util/intel-pt.h"
35
36#define KiB(x) ((x) * 1024)
37#define MiB(x) ((x) * 1024 * 1024)
38#define KiB_MASK(x) (KiB(x) - 1)
39#define MiB_MASK(x) (MiB(x) - 1)
40
41#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4)
42
43#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60)
44
45#define INTEL_PT_PSB_PERIOD_NEAR 256
46
47struct intel_pt_snapshot_ref {
48 void *ref_buf;
49 size_t ref_offset;
50 bool wrapped;
51};
52
53struct intel_pt_recording {
54 struct auxtrace_record itr;
55 struct perf_pmu *intel_pt_pmu;
56 int have_sched_switch;
57 struct perf_evlist *evlist;
58 bool snapshot_mode;
59 bool snapshot_init_done;
60 size_t snapshot_size;
61 size_t snapshot_ref_buf_size;
62 int snapshot_ref_cnt;
63 struct intel_pt_snapshot_ref *snapshot_refs;
64};
65
66static int intel_pt_parse_terms_with_default(struct list_head *formats,
67 const char *str,
68 u64 *config)
69{
70 struct list_head *terms;
71 struct perf_event_attr attr = { .size = 0, };
72 int err;
73
74 terms = malloc(sizeof(struct list_head));
75 if (!terms)
76 return -ENOMEM;
77
78 INIT_LIST_HEAD(terms);
79
80 err = parse_events_terms(terms, str);
81 if (err)
82 goto out_free;
83
84 attr.config = *config;
85 err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
86 if (err)
87 goto out_free;
88
89 *config = attr.config;
90out_free:
91 parse_events__free_terms(terms);
92 return err;
93}
94
95static int intel_pt_parse_terms(struct list_head *formats, const char *str,
96 u64 *config)
97{
98 *config = 0;
99 return intel_pt_parse_terms_with_default(formats, str, config);
100}
101
102static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused,
103 struct perf_evlist *evlist __maybe_unused)
104{
105 return 256;
106}
107
108static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
109{
110 u64 config;
111
112 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config);
113 return config;
114}
115
116static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
117 struct record_opts *opts,
118 const char *str)
119{
120 struct intel_pt_recording *ptr =
121 container_of(itr, struct intel_pt_recording, itr);
122 unsigned long long snapshot_size = 0;
123 char *endptr;
124
125 if (str) {
126 snapshot_size = strtoull(str, &endptr, 0);
127 if (*endptr || snapshot_size > SIZE_MAX)
128 return -1;
129 }
130
131 opts->auxtrace_snapshot_mode = true;
132 opts->auxtrace_snapshot_size = snapshot_size;
133
134 ptr->snapshot_size = snapshot_size;
135
136 return 0;
137}
138
139struct perf_event_attr *
140intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
141{
142 struct perf_event_attr *attr;
143
144 attr = zalloc(sizeof(struct perf_event_attr));
145 if (!attr)
146 return NULL;
147
148 attr->config = intel_pt_default_config(intel_pt_pmu);
149
150 intel_pt_pmu->selectable = true;
151
152 return attr;
153}
154
155static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
156{
157 return INTEL_PT_AUXTRACE_PRIV_SIZE;
158}
159
160static int intel_pt_info_fill(struct auxtrace_record *itr,
161 struct perf_session *session,
162 struct auxtrace_info_event *auxtrace_info,
163 size_t priv_size)
164{
165 struct intel_pt_recording *ptr =
166 container_of(itr, struct intel_pt_recording, itr);
167 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
168 struct perf_event_mmap_page *pc;
169 struct perf_tsc_conversion tc = { .time_mult = 0, };
170 bool cap_user_time_zero = false, per_cpu_mmaps;
171 u64 tsc_bit, noretcomp_bit;
172 int err;
173
174 if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
175 return -EINVAL;
176
177 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
178 intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
179 &noretcomp_bit);
180
181 if (!session->evlist->nr_mmaps)
182 return -EINVAL;
183
184 pc = session->evlist->mmap[0].base;
185 if (pc) {
186 err = perf_read_tsc_conversion(pc, &tc);
187 if (err) {
188 if (err != -EOPNOTSUPP)
189 return err;
190 } else {
191 cap_user_time_zero = tc.time_mult != 0;
192 }
193 if (!cap_user_time_zero)
194 ui__warning("Intel Processor Trace: TSC not available\n");
195 }
196
197 per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
198
199 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
200 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
201 auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
202 auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
203 auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
204 auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
205 auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
206 auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
207 auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
208 auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
209 auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
210
211 return 0;
212}
213
214static int intel_pt_track_switches(struct perf_evlist *evlist)
215{
216 const char *sched_switch = "sched:sched_switch";
217 struct perf_evsel *evsel;
218 int err;
219
220 if (!perf_evlist__can_select_event(evlist, sched_switch))
221 return -EPERM;
222
223 err = parse_events(evlist, sched_switch, NULL);
224 if (err) {
225 pr_debug2("%s: failed to parse %s, error %d\n",
226 __func__, sched_switch, err);
227 return err;
228 }
229
230 evsel = perf_evlist__last(evlist);
231
232 perf_evsel__set_sample_bit(evsel, CPU);
233 perf_evsel__set_sample_bit(evsel, TIME);
234
235 evsel->system_wide = true;
236 evsel->no_aux_samples = true;
237 evsel->immediate = true;
238
239 return 0;
240}
241
242static int intel_pt_recording_options(struct auxtrace_record *itr,
243 struct perf_evlist *evlist,
244 struct record_opts *opts)
245{
246 struct intel_pt_recording *ptr =
247 container_of(itr, struct intel_pt_recording, itr);
248 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
249 bool have_timing_info;
250 struct perf_evsel *evsel, *intel_pt_evsel = NULL;
251 const struct cpu_map *cpus = evlist->cpus;
252 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
253 u64 tsc_bit;
254
255 ptr->evlist = evlist;
256 ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
257
258 evlist__for_each(evlist, evsel) {
259 if (evsel->attr.type == intel_pt_pmu->type) {
260 if (intel_pt_evsel) {
261 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
262 return -EINVAL;
263 }
264 evsel->attr.freq = 0;
265 evsel->attr.sample_period = 1;
266 intel_pt_evsel = evsel;
267 opts->full_auxtrace = true;
268 }
269 }
270
271 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
272 pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
273 return -EINVAL;
274 }
275
276 if (opts->use_clockid) {
277 pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
278 return -EINVAL;
279 }
280
281 if (!opts->full_auxtrace)
282 return 0;
283
284 /* Set default sizes for snapshot mode */
285 if (opts->auxtrace_snapshot_mode) {
286 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
287
288 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
289 if (privileged) {
290 opts->auxtrace_mmap_pages = MiB(4) / page_size;
291 } else {
292 opts->auxtrace_mmap_pages = KiB(128) / page_size;
293 if (opts->mmap_pages == UINT_MAX)
294 opts->mmap_pages = KiB(256) / page_size;
295 }
296 } else if (!opts->auxtrace_mmap_pages && !privileged &&
297 opts->mmap_pages == UINT_MAX) {
298 opts->mmap_pages = KiB(256) / page_size;
299 }
300 if (!opts->auxtrace_snapshot_size)
301 opts->auxtrace_snapshot_size =
302 opts->auxtrace_mmap_pages * (size_t)page_size;
303 if (!opts->auxtrace_mmap_pages) {
304 size_t sz = opts->auxtrace_snapshot_size;
305
306 sz = round_up(sz, page_size) / page_size;
307 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
308 }
309 if (opts->auxtrace_snapshot_size >
310 opts->auxtrace_mmap_pages * (size_t)page_size) {
311 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
312 opts->auxtrace_snapshot_size,
313 opts->auxtrace_mmap_pages * (size_t)page_size);
314 return -EINVAL;
315 }
316 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
317 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
318 return -EINVAL;
319 }
320 pr_debug2("Intel PT snapshot size: %zu\n",
321 opts->auxtrace_snapshot_size);
322 if (psb_period &&
323 opts->auxtrace_snapshot_size <= psb_period +
324 INTEL_PT_PSB_PERIOD_NEAR)
325 ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
326 opts->auxtrace_snapshot_size, psb_period);
327 }
328
329 /* Set default sizes for full trace mode */
330 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
331 if (privileged) {
332 opts->auxtrace_mmap_pages = MiB(4) / page_size;
333 } else {
334 opts->auxtrace_mmap_pages = KiB(128) / page_size;
335 if (opts->mmap_pages == UINT_MAX)
336 opts->mmap_pages = KiB(256) / page_size;
337 }
338 }
339
340 /* Validate auxtrace_mmap_pages */
341 if (opts->auxtrace_mmap_pages) {
342 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
343 size_t min_sz;
344
345 if (opts->auxtrace_snapshot_mode)
346 min_sz = KiB(4);
347 else
348 min_sz = KiB(8);
349
350 if (sz < min_sz || !is_power_of_2(sz)) {
351 pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
352 min_sz / 1024);
353 return -EINVAL;
354 }
355 }
356
357 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
358
359 if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
360 have_timing_info = true;
361 else
362 have_timing_info = false;
363
364 /*
365 * Per-cpu recording needs sched_switch events to distinguish different
366 * threads.
367 */
368 if (have_timing_info && !cpu_map__empty(cpus)) {
369 int err;
370
371 err = intel_pt_track_switches(evlist);
372 if (err == -EPERM)
373 pr_debug2("Unable to select sched:sched_switch\n");
374 else if (err)
375 return err;
376 else
377 ptr->have_sched_switch = 1;
378 }
379
380 if (intel_pt_evsel) {
381 /*
382 * To obtain the auxtrace buffer file descriptor, the auxtrace
383 * event must come first.
384 */
385 perf_evlist__to_front(evlist, intel_pt_evsel);
386 /*
387 * In the case of per-cpu mmaps, we need the CPU on the
388 * AUX event.
389 */
390 if (!cpu_map__empty(cpus))
391 perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
392 }
393
394 /* Add dummy event to keep tracking */
395 if (opts->full_auxtrace) {
396 struct perf_evsel *tracking_evsel;
397 int err;
398
399 err = parse_events(evlist, "dummy:u", NULL);
400 if (err)
401 return err;
402
403 tracking_evsel = perf_evlist__last(evlist);
404
405 perf_evlist__set_tracking_event(evlist, tracking_evsel);
406
407 tracking_evsel->attr.freq = 0;
408 tracking_evsel->attr.sample_period = 1;
409
410 /* In per-cpu case, always need the time of mmap events etc */
411 if (!cpu_map__empty(cpus))
412 perf_evsel__set_sample_bit(tracking_evsel, TIME);
413 }
414
415 /*
416 * Warn the user when we do not have enough information to decode i.e.
417 * per-cpu with no sched_switch (except workload-only).
418 */
419 if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
420 !target__none(&opts->target))
421 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
422
423 return 0;
424}
425
426static int intel_pt_snapshot_start(struct auxtrace_record *itr)
427{
428 struct intel_pt_recording *ptr =
429 container_of(itr, struct intel_pt_recording, itr);
430 struct perf_evsel *evsel;
431
432 evlist__for_each(ptr->evlist, evsel) {
433 if (evsel->attr.type == ptr->intel_pt_pmu->type)
434 return perf_evlist__disable_event(ptr->evlist, evsel);
435 }
436 return -EINVAL;
437}
438
439static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
440{
441 struct intel_pt_recording *ptr =
442 container_of(itr, struct intel_pt_recording, itr);
443 struct perf_evsel *evsel;
444
445 evlist__for_each(ptr->evlist, evsel) {
446 if (evsel->attr.type == ptr->intel_pt_pmu->type)
447 return perf_evlist__enable_event(ptr->evlist, evsel);
448 }
449 return -EINVAL;
450}
451
452static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
453{
454 const size_t sz = sizeof(struct intel_pt_snapshot_ref);
455 int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
456 struct intel_pt_snapshot_ref *refs;
457
458 if (!new_cnt)
459 new_cnt = 16;
460
461 while (new_cnt <= idx)
462 new_cnt *= 2;
463
464 refs = calloc(new_cnt, sz);
465 if (!refs)
466 return -ENOMEM;
467
468 memcpy(refs, ptr->snapshot_refs, cnt * sz);
469
470 ptr->snapshot_refs = refs;
471 ptr->snapshot_ref_cnt = new_cnt;
472
473 return 0;
474}
475
476static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
477{
478 int i;
479
480 for (i = 0; i < ptr->snapshot_ref_cnt; i++)
481 zfree(&ptr->snapshot_refs[i].ref_buf);
482 zfree(&ptr->snapshot_refs);
483}
484
485static void intel_pt_recording_free(struct auxtrace_record *itr)
486{
487 struct intel_pt_recording *ptr =
488 container_of(itr, struct intel_pt_recording, itr);
489
490 intel_pt_free_snapshot_refs(ptr);
491 free(ptr);
492}
493
494static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
495 size_t snapshot_buf_size)
496{
497 size_t ref_buf_size = ptr->snapshot_ref_buf_size;
498 void *ref_buf;
499
500 ref_buf = zalloc(ref_buf_size);
501 if (!ref_buf)
502 return -ENOMEM;
503
504 ptr->snapshot_refs[idx].ref_buf = ref_buf;
505 ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
506
507 return 0;
508}
509
510static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
511 size_t snapshot_buf_size)
512{
513 const size_t max_size = 256 * 1024;
514 size_t buf_size = 0, psb_period;
515
516 if (ptr->snapshot_size <= 64 * 1024)
517 return 0;
518
519 psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
520 if (psb_period)
521 buf_size = psb_period * 2;
522
523 if (!buf_size || buf_size > max_size)
524 buf_size = max_size;
525
526 if (buf_size >= snapshot_buf_size)
527 return 0;
528
529 if (buf_size >= ptr->snapshot_size / 2)
530 return 0;
531
532 return buf_size;
533}
534
535static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
536 size_t snapshot_buf_size)
537{
538 if (ptr->snapshot_init_done)
539 return 0;
540
541 ptr->snapshot_init_done = true;
542
543 ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
544 snapshot_buf_size);
545
546 return 0;
547}
548
549/**
550 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
551 * @buf1: first buffer
552 * @compare_size: number of bytes to compare
553 * @buf2: second buffer (a circular buffer)
554 * @offs2: offset in second buffer
555 * @buf2_size: size of second buffer
556 *
557 * The comparison allows for the possibility that the bytes to compare in the
558 * circular buffer are not contiguous. It is assumed that @compare_size <=
559 * @buf2_size. This function returns %false if the bytes are identical, %true
560 * otherwise.
561 */
562static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
563 void *buf2, size_t offs2, size_t buf2_size)
564{
565 size_t end2 = offs2 + compare_size, part_size;
566
567 if (end2 <= buf2_size)
568 return memcmp(buf1, buf2 + offs2, compare_size);
569
570 part_size = end2 - buf2_size;
571 if (memcmp(buf1, buf2 + offs2, part_size))
572 return true;
573
574 compare_size -= part_size;
575
576 return memcmp(buf1 + part_size, buf2, compare_size);
577}
578
579static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
580 size_t ref_size, size_t buf_size,
581 void *data, size_t head)
582{
583 size_t ref_end = ref_offset + ref_size;
584
585 if (ref_end > buf_size) {
586 if (head > ref_offset || head < ref_end - buf_size)
587 return true;
588 } else if (head > ref_offset && head < ref_end) {
589 return true;
590 }
591
592 return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
593 buf_size);
594}
595
596static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
597 void *data, size_t head)
598{
599 if (head >= ref_size) {
600 memcpy(ref_buf, data + head - ref_size, ref_size);
601 } else {
602 memcpy(ref_buf, data, head);
603 ref_size -= head;
604 memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
605 }
606}
607
608static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
609 struct auxtrace_mmap *mm, unsigned char *data,
610 u64 head)
611{
612 struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
613 bool wrapped;
614
615 wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
616 ptr->snapshot_ref_buf_size, mm->len,
617 data, head);
618
619 intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
620 data, head);
621
622 return wrapped;
623}
624
625static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
626{
627 int i, a, b;
628
629 b = buf_size >> 3;
630 a = b - 512;
631 if (a < 0)
632 a = 0;
633
634 for (i = a; i < b; i++) {
635 if (data[i])
636 return true;
637 }
638
639 return false;
640}
641
642static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
643 struct auxtrace_mmap *mm, unsigned char *data,
644 u64 *head, u64 *old)
645{
646 struct intel_pt_recording *ptr =
647 container_of(itr, struct intel_pt_recording, itr);
648 bool wrapped;
649 int err;
650
651 pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
652 __func__, idx, (size_t)*old, (size_t)*head);
653
654 err = intel_pt_snapshot_init(ptr, mm->len);
655 if (err)
656 goto out_err;
657
658 if (idx >= ptr->snapshot_ref_cnt) {
659 err = intel_pt_alloc_snapshot_refs(ptr, idx);
660 if (err)
661 goto out_err;
662 }
663
664 if (ptr->snapshot_ref_buf_size) {
665 if (!ptr->snapshot_refs[idx].ref_buf) {
666 err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
667 if (err)
668 goto out_err;
669 }
670 wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
671 } else {
672 wrapped = ptr->snapshot_refs[idx].wrapped;
673 if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
674 ptr->snapshot_refs[idx].wrapped = true;
675 wrapped = true;
676 }
677 }
678
679 /*
680 * In full trace mode 'head' continually increases. However in snapshot
681 * mode 'head' is an offset within the buffer. Here 'old' and 'head'
682 * are adjusted to match the full trace case which expects that 'old' is
683 * always less than 'head'.
684 */
685 if (wrapped) {
686 *old = *head;
687 *head += mm->len;
688 } else {
689 if (mm->mask)
690 *old &= mm->mask;
691 else
692 *old %= mm->len;
693 if (*old > *head)
694 *head += mm->len;
695 }
696
697 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
698 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
699
700 return 0;
701
702out_err:
703 pr_err("%s: failed, error %d\n", __func__, err);
704 return err;
705}
706
707static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
708{
709 return rdtsc();
710}
711
712static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
713{
714 struct intel_pt_recording *ptr =
715 container_of(itr, struct intel_pt_recording, itr);
716 struct perf_evsel *evsel;
717
718 evlist__for_each(ptr->evlist, evsel) {
719 if (evsel->attr.type == ptr->intel_pt_pmu->type)
720 return perf_evlist__enable_event_idx(ptr->evlist, evsel,
721 idx);
722 }
723 return -EINVAL;
724}
725
726struct auxtrace_record *intel_pt_recording_init(int *err)
727{
728 struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
729 struct intel_pt_recording *ptr;
730
731 if (!intel_pt_pmu)
732 return NULL;
733
734 ptr = zalloc(sizeof(struct intel_pt_recording));
735 if (!ptr) {
736 *err = -ENOMEM;
737 return NULL;
738 }
739
740 ptr->intel_pt_pmu = intel_pt_pmu;
741 ptr->itr.recording_options = intel_pt_recording_options;
742 ptr->itr.info_priv_size = intel_pt_info_priv_size;
743 ptr->itr.info_fill = intel_pt_info_fill;
744 ptr->itr.free = intel_pt_recording_free;
745 ptr->itr.snapshot_start = intel_pt_snapshot_start;
746 ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
747 ptr->itr.find_snapshot = intel_pt_find_snapshot;
748 ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
749 ptr->itr.reference = intel_pt_reference;
750 ptr->itr.read_finish = intel_pt_read_finish;
751 return &ptr->itr;
752}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 615ca12c2e44..c20473d1369e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -79,6 +79,7 @@ libperf-y += cloexec.o
79libperf-y += thread-stack.o 79libperf-y += thread-stack.o
80libperf-$(CONFIG_AUXTRACE) += auxtrace.o 80libperf-$(CONFIG_AUXTRACE) += auxtrace.o
81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
82libperf-$(CONFIG_AUXTRACE) += intel-pt.o
82libperf-y += parse-branch-options.o 83libperf-y += parse-branch-options.o
83 84
84libperf-$(CONFIG_LIBELF) += symbol-elf.o 85libperf-$(CONFIG_LIBELF) += symbol-elf.o
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
new file mode 100644
index 000000000000..2a4a4120473b
--- /dev/null
+++ b/tools/perf/util/intel-pt.c
@@ -0,0 +1,1911 @@
1/*
2 * intel_pt.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdio.h>
17#include <stdbool.h>
18#include <errno.h>
19#include <linux/kernel.h>
20#include <linux/types.h>
21
22#include "../perf.h"
23#include "session.h"
24#include "machine.h"
25#include "tool.h"
26#include "event.h"
27#include "evlist.h"
28#include "evsel.h"
29#include "map.h"
30#include "color.h"
31#include "util.h"
32#include "thread.h"
33#include "thread-stack.h"
34#include "symbol.h"
35#include "callchain.h"
36#include "dso.h"
37#include "debug.h"
38#include "auxtrace.h"
39#include "tsc.h"
40#include "intel-pt.h"
41
42#include "intel-pt-decoder/intel-pt-log.h"
43#include "intel-pt-decoder/intel-pt-decoder.h"
44#include "intel-pt-decoder/intel-pt-insn-decoder.h"
45#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
46
47#define MAX_TIMESTAMP (~0ULL)
48
49struct intel_pt {
50 struct auxtrace auxtrace;
51 struct auxtrace_queues queues;
52 struct auxtrace_heap heap;
53 u32 auxtrace_type;
54 struct perf_session *session;
55 struct machine *machine;
56 struct perf_evsel *switch_evsel;
57 struct thread *unknown_thread;
58 bool timeless_decoding;
59 bool sampling_mode;
60 bool snapshot_mode;
61 bool per_cpu_mmaps;
62 bool have_tsc;
63 bool data_queued;
64 bool est_tsc;
65 bool sync_switch;
66 int have_sched_switch;
67 u32 pmu_type;
68 u64 kernel_start;
69 u64 switch_ip;
70 u64 ptss_ip;
71
72 struct perf_tsc_conversion tc;
73 bool cap_user_time_zero;
74
75 struct itrace_synth_opts synth_opts;
76
77 bool sample_instructions;
78 u64 instructions_sample_type;
79 u64 instructions_sample_period;
80 u64 instructions_id;
81
82 bool sample_branches;
83 u32 branches_filter;
84 u64 branches_sample_type;
85 u64 branches_id;
86
87 bool sample_transactions;
88 u64 transactions_sample_type;
89 u64 transactions_id;
90
91 bool synth_needs_swap;
92
93 u64 tsc_bit;
94 u64 noretcomp_bit;
95 unsigned max_non_turbo_ratio;
96};
97
98enum switch_state {
99 INTEL_PT_SS_NOT_TRACING,
100 INTEL_PT_SS_UNKNOWN,
101 INTEL_PT_SS_TRACING,
102 INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
103 INTEL_PT_SS_EXPECTING_SWITCH_IP,
104};
105
106struct intel_pt_queue {
107 struct intel_pt *pt;
108 unsigned int queue_nr;
109 struct auxtrace_buffer *buffer;
110 void *decoder;
111 const struct intel_pt_state *state;
112 struct ip_callchain *chain;
113 union perf_event *event_buf;
114 bool on_heap;
115 bool stop;
116 bool step_through_buffers;
117 bool use_buffer_pid_tid;
118 pid_t pid, tid;
119 int cpu;
120 int switch_state;
121 pid_t next_tid;
122 struct thread *thread;
123 bool exclude_kernel;
124 bool have_sample;
125 u64 time;
126 u64 timestamp;
127 u32 flags;
128 u16 insn_len;
129};
130
131static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
132 unsigned char *buf, size_t len)
133{
134 struct intel_pt_pkt packet;
135 size_t pos = 0;
136 int ret, pkt_len, i;
137 char desc[INTEL_PT_PKT_DESC_MAX];
138 const char *color = PERF_COLOR_BLUE;
139
140 color_fprintf(stdout, color,
141 ". ... Intel Processor Trace data: size %zu bytes\n",
142 len);
143
144 while (len) {
145 ret = intel_pt_get_packet(buf, len, &packet);
146 if (ret > 0)
147 pkt_len = ret;
148 else
149 pkt_len = 1;
150 printf(".");
151 color_fprintf(stdout, color, " %08x: ", pos);
152 for (i = 0; i < pkt_len; i++)
153 color_fprintf(stdout, color, " %02x", buf[i]);
154 for (; i < 16; i++)
155 color_fprintf(stdout, color, " ");
156 if (ret > 0) {
157 ret = intel_pt_pkt_desc(&packet, desc,
158 INTEL_PT_PKT_DESC_MAX);
159 if (ret > 0)
160 color_fprintf(stdout, color, " %s\n", desc);
161 } else {
162 color_fprintf(stdout, color, " Bad packet!\n");
163 }
164 pos += pkt_len;
165 buf += pkt_len;
166 len -= pkt_len;
167 }
168}
169
170static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
171 size_t len)
172{
173 printf(".\n");
174 intel_pt_dump(pt, buf, len);
175}
176
177static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
178 struct auxtrace_buffer *b)
179{
180 void *start;
181
182 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
183 pt->have_tsc);
184 if (!start)
185 return -EINVAL;
186 b->use_size = b->data + b->size - start;
187 b->use_data = start;
188 return 0;
189}
190
191static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
192 struct auxtrace_queue *queue,
193 struct auxtrace_buffer *buffer)
194{
195 if (queue->cpu == -1 && buffer->cpu != -1)
196 ptq->cpu = buffer->cpu;
197
198 ptq->pid = buffer->pid;
199 ptq->tid = buffer->tid;
200
201 intel_pt_log("queue %u cpu %d pid %d tid %d\n",
202 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
203
204 thread__zput(ptq->thread);
205
206 if (ptq->tid != -1) {
207 if (ptq->pid != -1)
208 ptq->thread = machine__findnew_thread(ptq->pt->machine,
209 ptq->pid,
210 ptq->tid);
211 else
212 ptq->thread = machine__find_thread(ptq->pt->machine, -1,
213 ptq->tid);
214 }
215}
216
217/* This function assumes data is processed sequentially only */
218static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
219{
220 struct intel_pt_queue *ptq = data;
221 struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
222 struct auxtrace_queue *queue;
223
224 if (ptq->stop) {
225 b->len = 0;
226 return 0;
227 }
228
229 queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
230
231 buffer = auxtrace_buffer__next(queue, buffer);
232 if (!buffer) {
233 if (old_buffer)
234 auxtrace_buffer__drop_data(old_buffer);
235 b->len = 0;
236 return 0;
237 }
238
239 ptq->buffer = buffer;
240
241 if (!buffer->data) {
242 int fd = perf_data_file__fd(ptq->pt->session->file);
243
244 buffer->data = auxtrace_buffer__get_data(buffer, fd);
245 if (!buffer->data)
246 return -ENOMEM;
247 }
248
249 if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
250 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
251 return -ENOMEM;
252
253 if (old_buffer)
254 auxtrace_buffer__drop_data(old_buffer);
255
256 if (buffer->use_data) {
257 b->len = buffer->use_size;
258 b->buf = buffer->use_data;
259 } else {
260 b->len = buffer->size;
261 b->buf = buffer->data;
262 }
263 b->ref_timestamp = buffer->reference;
264
265 if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
266 !buffer->consecutive)) {
267 b->consecutive = false;
268 b->trace_nr = buffer->buffer_nr + 1;
269 } else {
270 b->consecutive = true;
271 }
272
273 if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
274 ptq->tid != buffer->tid))
275 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
276
277 if (ptq->step_through_buffers)
278 ptq->stop = true;
279
280 if (!b->len)
281 return intel_pt_get_trace(b, data);
282
283 return 0;
284}
285
286struct intel_pt_cache_entry {
287 struct auxtrace_cache_entry entry;
288 u64 insn_cnt;
289 u64 byte_cnt;
290 enum intel_pt_insn_op op;
291 enum intel_pt_insn_branch branch;
292 int length;
293 int32_t rel;
294};
295
296static int intel_pt_config_div(const char *var, const char *value, void *data)
297{
298 int *d = data;
299 long val;
300
301 if (!strcmp(var, "intel-pt.cache-divisor")) {
302 val = strtol(value, NULL, 0);
303 if (val > 0 && val <= INT_MAX)
304 *d = val;
305 }
306
307 return 0;
308}
309
310static int intel_pt_cache_divisor(void)
311{
312 static int d;
313
314 if (d)
315 return d;
316
317 perf_config(intel_pt_config_div, &d);
318
319 if (!d)
320 d = 64;
321
322 return d;
323}
324
325static unsigned int intel_pt_cache_size(struct dso *dso,
326 struct machine *machine)
327{
328 off_t size;
329
330 size = dso__data_size(dso, machine);
331 size /= intel_pt_cache_divisor();
332 if (size < 1000)
333 return 10;
334 if (size > (1 << 21))
335 return 21;
336 return 32 - __builtin_clz(size);
337}
338
339static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
340 struct machine *machine)
341{
342 struct auxtrace_cache *c;
343 unsigned int bits;
344
345 if (dso->auxtrace_cache)
346 return dso->auxtrace_cache;
347
348 bits = intel_pt_cache_size(dso, machine);
349
350 /* Ignoring cache creation failure */
351 c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
352
353 dso->auxtrace_cache = c;
354
355 return c;
356}
357
358static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
359 u64 offset, u64 insn_cnt, u64 byte_cnt,
360 struct intel_pt_insn *intel_pt_insn)
361{
362 struct auxtrace_cache *c = intel_pt_cache(dso, machine);
363 struct intel_pt_cache_entry *e;
364 int err;
365
366 if (!c)
367 return -ENOMEM;
368
369 e = auxtrace_cache__alloc_entry(c);
370 if (!e)
371 return -ENOMEM;
372
373 e->insn_cnt = insn_cnt;
374 e->byte_cnt = byte_cnt;
375 e->op = intel_pt_insn->op;
376 e->branch = intel_pt_insn->branch;
377 e->length = intel_pt_insn->length;
378 e->rel = intel_pt_insn->rel;
379
380 err = auxtrace_cache__add(c, offset, &e->entry);
381 if (err)
382 auxtrace_cache__free_entry(c, e);
383
384 return err;
385}
386
387static struct intel_pt_cache_entry *
388intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
389{
390 struct auxtrace_cache *c = intel_pt_cache(dso, machine);
391
392 if (!c)
393 return NULL;
394
395 return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
396}
397
398static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
399 uint64_t *insn_cnt_ptr, uint64_t *ip,
400 uint64_t to_ip, uint64_t max_insn_cnt,
401 void *data)
402{
403 struct intel_pt_queue *ptq = data;
404 struct machine *machine = ptq->pt->machine;
405 struct thread *thread;
406 struct addr_location al;
407 unsigned char buf[1024];
408 size_t bufsz;
409 ssize_t len;
410 int x86_64;
411 u8 cpumode;
412 u64 offset, start_offset, start_ip;
413 u64 insn_cnt = 0;
414 bool one_map = true;
415
416 if (to_ip && *ip == to_ip)
417 goto out_no_cache;
418
419 bufsz = intel_pt_insn_max_size();
420
421 if (*ip >= ptq->pt->kernel_start)
422 cpumode = PERF_RECORD_MISC_KERNEL;
423 else
424 cpumode = PERF_RECORD_MISC_USER;
425
426 thread = ptq->thread;
427 if (!thread) {
428 if (cpumode != PERF_RECORD_MISC_KERNEL)
429 return -EINVAL;
430 thread = ptq->pt->unknown_thread;
431 }
432
433 while (1) {
434 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
435 if (!al.map || !al.map->dso)
436 return -EINVAL;
437
438 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
439 dso__data_status_seen(al.map->dso,
440 DSO_DATA_STATUS_SEEN_ITRACE))
441 return -ENOENT;
442
443 offset = al.map->map_ip(al.map, *ip);
444
445 if (!to_ip && one_map) {
446 struct intel_pt_cache_entry *e;
447
448 e = intel_pt_cache_lookup(al.map->dso, machine, offset);
449 if (e &&
450 (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
451 *insn_cnt_ptr = e->insn_cnt;
452 *ip += e->byte_cnt;
453 intel_pt_insn->op = e->op;
454 intel_pt_insn->branch = e->branch;
455 intel_pt_insn->length = e->length;
456 intel_pt_insn->rel = e->rel;
457 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
458 return 0;
459 }
460 }
461
462 start_offset = offset;
463 start_ip = *ip;
464
465 /* Load maps to ensure dso->is_64_bit has been updated */
466 map__load(al.map, machine->symbol_filter);
467
468 x86_64 = al.map->dso->is_64_bit;
469
470 while (1) {
471 len = dso__data_read_offset(al.map->dso, machine,
472 offset, buf, bufsz);
473 if (len <= 0)
474 return -EINVAL;
475
476 if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
477 return -EINVAL;
478
479 intel_pt_log_insn(intel_pt_insn, *ip);
480
481 insn_cnt += 1;
482
483 if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
484 goto out;
485
486 if (max_insn_cnt && insn_cnt >= max_insn_cnt)
487 goto out_no_cache;
488
489 *ip += intel_pt_insn->length;
490
491 if (to_ip && *ip == to_ip)
492 goto out_no_cache;
493
494 if (*ip >= al.map->end)
495 break;
496
497 offset += intel_pt_insn->length;
498 }
499 one_map = false;
500 }
501out:
502 *insn_cnt_ptr = insn_cnt;
503
504 if (!one_map)
505 goto out_no_cache;
506
507 /*
508 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
509 * entries.
510 */
511 if (to_ip) {
512 struct intel_pt_cache_entry *e;
513
514 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
515 if (e)
516 return 0;
517 }
518
519 /* Ignore cache errors */
520 intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
521 *ip - start_ip, intel_pt_insn);
522
523 return 0;
524
525out_no_cache:
526 *insn_cnt_ptr = insn_cnt;
527 return 0;
528}
529
530static bool intel_pt_get_config(struct intel_pt *pt,
531 struct perf_event_attr *attr, u64 *config)
532{
533 if (attr->type == pt->pmu_type) {
534 if (config)
535 *config = attr->config;
536 return true;
537 }
538
539 return false;
540}
541
542static bool intel_pt_exclude_kernel(struct intel_pt *pt)
543{
544 struct perf_evsel *evsel;
545
546 evlist__for_each(pt->session->evlist, evsel) {
547 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
548 !evsel->attr.exclude_kernel)
549 return false;
550 }
551 return true;
552}
553
554static bool intel_pt_return_compression(struct intel_pt *pt)
555{
556 struct perf_evsel *evsel;
557 u64 config;
558
559 if (!pt->noretcomp_bit)
560 return true;
561
562 evlist__for_each(pt->session->evlist, evsel) {
563 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
564 (config & pt->noretcomp_bit))
565 return false;
566 }
567 return true;
568}
569
570static bool intel_pt_timeless_decoding(struct intel_pt *pt)
571{
572 struct perf_evsel *evsel;
573 bool timeless_decoding = true;
574 u64 config;
575
576 if (!pt->tsc_bit || !pt->cap_user_time_zero)
577 return true;
578
579 evlist__for_each(pt->session->evlist, evsel) {
580 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
581 return true;
582 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
583 if (config & pt->tsc_bit)
584 timeless_decoding = false;
585 else
586 return true;
587 }
588 }
589 return timeless_decoding;
590}
591
592static bool intel_pt_tracing_kernel(struct intel_pt *pt)
593{
594 struct perf_evsel *evsel;
595
596 evlist__for_each(pt->session->evlist, evsel) {
597 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
598 !evsel->attr.exclude_kernel)
599 return true;
600 }
601 return false;
602}
603
604static bool intel_pt_have_tsc(struct intel_pt *pt)
605{
606 struct perf_evsel *evsel;
607 bool have_tsc = false;
608 u64 config;
609
610 if (!pt->tsc_bit)
611 return false;
612
613 evlist__for_each(pt->session->evlist, evsel) {
614 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
615 if (config & pt->tsc_bit)
616 have_tsc = true;
617 else
618 return false;
619 }
620 }
621 return have_tsc;
622}
623
624static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
625{
626 u64 quot, rem;
627
628 quot = ns / pt->tc.time_mult;
629 rem = ns % pt->tc.time_mult;
630 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
631 pt->tc.time_mult;
632}
633
634static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
635 unsigned int queue_nr)
636{
637 struct intel_pt_params params = { .get_trace = 0, };
638 struct intel_pt_queue *ptq;
639
640 ptq = zalloc(sizeof(struct intel_pt_queue));
641 if (!ptq)
642 return NULL;
643
644 if (pt->synth_opts.callchain) {
645 size_t sz = sizeof(struct ip_callchain);
646
647 sz += pt->synth_opts.callchain_sz * sizeof(u64);
648 ptq->chain = zalloc(sz);
649 if (!ptq->chain)
650 goto out_free;
651 }
652
653 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
654 if (!ptq->event_buf)
655 goto out_free;
656
657 ptq->pt = pt;
658 ptq->queue_nr = queue_nr;
659 ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
660 ptq->pid = -1;
661 ptq->tid = -1;
662 ptq->cpu = -1;
663 ptq->next_tid = -1;
664
665 params.get_trace = intel_pt_get_trace;
666 params.walk_insn = intel_pt_walk_next_insn;
667 params.data = ptq;
668 params.return_compression = intel_pt_return_compression(pt);
669 params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
670
671 if (pt->synth_opts.instructions) {
672 if (pt->synth_opts.period) {
673 switch (pt->synth_opts.period_type) {
674 case PERF_ITRACE_PERIOD_INSTRUCTIONS:
675 params.period_type =
676 INTEL_PT_PERIOD_INSTRUCTIONS;
677 params.period = pt->synth_opts.period;
678 break;
679 case PERF_ITRACE_PERIOD_TICKS:
680 params.period_type = INTEL_PT_PERIOD_TICKS;
681 params.period = pt->synth_opts.period;
682 break;
683 case PERF_ITRACE_PERIOD_NANOSECS:
684 params.period_type = INTEL_PT_PERIOD_TICKS;
685 params.period = intel_pt_ns_to_ticks(pt,
686 pt->synth_opts.period);
687 break;
688 default:
689 break;
690 }
691 }
692
693 if (!params.period) {
694 params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
695 params.period = 1000;
696 }
697 }
698
699 ptq->decoder = intel_pt_decoder_new(&params);
700 if (!ptq->decoder)
701 goto out_free;
702
703 return ptq;
704
705out_free:
706 zfree(&ptq->event_buf);
707 zfree(&ptq->chain);
708 free(ptq);
709 return NULL;
710}
711
712static void intel_pt_free_queue(void *priv)
713{
714 struct intel_pt_queue *ptq = priv;
715
716 if (!ptq)
717 return;
718 thread__zput(ptq->thread);
719 intel_pt_decoder_free(ptq->decoder);
720 zfree(&ptq->event_buf);
721 zfree(&ptq->chain);
722 free(ptq);
723}
724
725static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
726 struct auxtrace_queue *queue)
727{
728 struct intel_pt_queue *ptq = queue->priv;
729
730 if (queue->tid == -1 || pt->have_sched_switch) {
731 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
732 thread__zput(ptq->thread);
733 }
734
735 if (!ptq->thread && ptq->tid != -1)
736 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
737
738 if (ptq->thread) {
739 ptq->pid = ptq->thread->pid_;
740 if (queue->cpu == -1)
741 ptq->cpu = ptq->thread->cpu;
742 }
743}
744
745static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
746{
747 if (ptq->state->flags & INTEL_PT_ABORT_TX) {
748 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
749 } else if (ptq->state->flags & INTEL_PT_ASYNC) {
750 if (ptq->state->to_ip)
751 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
752 PERF_IP_FLAG_ASYNC |
753 PERF_IP_FLAG_INTERRUPT;
754 else
755 ptq->flags = PERF_IP_FLAG_BRANCH |
756 PERF_IP_FLAG_TRACE_END;
757 ptq->insn_len = 0;
758 } else {
759 if (ptq->state->from_ip)
760 ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
761 else
762 ptq->flags = PERF_IP_FLAG_BRANCH |
763 PERF_IP_FLAG_TRACE_BEGIN;
764 if (ptq->state->flags & INTEL_PT_IN_TX)
765 ptq->flags |= PERF_IP_FLAG_IN_TX;
766 ptq->insn_len = ptq->state->insn_len;
767 }
768}
769
770static int intel_pt_setup_queue(struct intel_pt *pt,
771 struct auxtrace_queue *queue,
772 unsigned int queue_nr)
773{
774 struct intel_pt_queue *ptq = queue->priv;
775
776 if (list_empty(&queue->head))
777 return 0;
778
779 if (!ptq) {
780 ptq = intel_pt_alloc_queue(pt, queue_nr);
781 if (!ptq)
782 return -ENOMEM;
783 queue->priv = ptq;
784
785 if (queue->cpu != -1)
786 ptq->cpu = queue->cpu;
787 ptq->tid = queue->tid;
788
789 if (pt->sampling_mode) {
790 if (pt->timeless_decoding)
791 ptq->step_through_buffers = true;
792 if (pt->timeless_decoding || !pt->have_sched_switch)
793 ptq->use_buffer_pid_tid = true;
794 }
795 }
796
797 if (!ptq->on_heap &&
798 (!pt->sync_switch ||
799 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
800 const struct intel_pt_state *state;
801 int ret;
802
803 if (pt->timeless_decoding)
804 return 0;
805
806 intel_pt_log("queue %u getting timestamp\n", queue_nr);
807 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
808 queue_nr, ptq->cpu, ptq->pid, ptq->tid);
809 while (1) {
810 state = intel_pt_decode(ptq->decoder);
811 if (state->err) {
812 if (state->err == INTEL_PT_ERR_NODATA) {
813 intel_pt_log("queue %u has no timestamp\n",
814 queue_nr);
815 return 0;
816 }
817 continue;
818 }
819 if (state->timestamp)
820 break;
821 }
822
823 ptq->timestamp = state->timestamp;
824 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
825 queue_nr, ptq->timestamp);
826 ptq->state = state;
827 ptq->have_sample = true;
828 intel_pt_sample_flags(ptq);
829 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
830 if (ret)
831 return ret;
832 ptq->on_heap = true;
833 }
834
835 return 0;
836}
837
838static int intel_pt_setup_queues(struct intel_pt *pt)
839{
840 unsigned int i;
841 int ret;
842
843 for (i = 0; i < pt->queues.nr_queues; i++) {
844 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
845 if (ret)
846 return ret;
847 }
848 return 0;
849}
850
851static int intel_pt_inject_event(union perf_event *event,
852 struct perf_sample *sample, u64 type,
853 bool swapped)
854{
855 event->header.size = perf_event__sample_event_size(sample, type, 0);
856 return perf_event__synthesize_sample(event, type, 0, sample, swapped);
857}
858
859static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
860{
861 int ret;
862 struct intel_pt *pt = ptq->pt;
863 union perf_event *event = ptq->event_buf;
864 struct perf_sample sample = { .ip = 0, };
865
866 event->sample.header.type = PERF_RECORD_SAMPLE;
867 event->sample.header.misc = PERF_RECORD_MISC_USER;
868 event->sample.header.size = sizeof(struct perf_event_header);
869
870 if (!pt->timeless_decoding)
871 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
872
873 sample.ip = ptq->state->from_ip;
874 sample.pid = ptq->pid;
875 sample.tid = ptq->tid;
876 sample.addr = ptq->state->to_ip;
877 sample.id = ptq->pt->branches_id;
878 sample.stream_id = ptq->pt->branches_id;
879 sample.period = 1;
880 sample.cpu = ptq->cpu;
881 sample.flags = ptq->flags;
882 sample.insn_len = ptq->insn_len;
883
884 if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
885 return 0;
886
887 if (pt->synth_opts.inject) {
888 ret = intel_pt_inject_event(event, &sample,
889 pt->branches_sample_type,
890 pt->synth_needs_swap);
891 if (ret)
892 return ret;
893 }
894
895 ret = perf_session__deliver_synth_event(pt->session, event, &sample);
896 if (ret)
897 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
898 ret);
899
900 return ret;
901}
902
903static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
904{
905 int ret;
906 struct intel_pt *pt = ptq->pt;
907 union perf_event *event = ptq->event_buf;
908 struct perf_sample sample = { .ip = 0, };
909
910 event->sample.header.type = PERF_RECORD_SAMPLE;
911 event->sample.header.misc = PERF_RECORD_MISC_USER;
912 event->sample.header.size = sizeof(struct perf_event_header);
913
914 if (!pt->timeless_decoding)
915 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
916
917 sample.ip = ptq->state->from_ip;
918 sample.pid = ptq->pid;
919 sample.tid = ptq->tid;
920 sample.addr = ptq->state->to_ip;
921 sample.id = ptq->pt->instructions_id;
922 sample.stream_id = ptq->pt->instructions_id;
923 sample.period = ptq->pt->instructions_sample_period;
924 sample.cpu = ptq->cpu;
925 sample.flags = ptq->flags;
926 sample.insn_len = ptq->insn_len;
927
928 if (pt->synth_opts.callchain) {
929 thread_stack__sample(ptq->thread, ptq->chain,
930 pt->synth_opts.callchain_sz, sample.ip);
931 sample.callchain = ptq->chain;
932 }
933
934 if (pt->synth_opts.inject) {
935 ret = intel_pt_inject_event(event, &sample,
936 pt->instructions_sample_type,
937 pt->synth_needs_swap);
938 if (ret)
939 return ret;
940 }
941
942 ret = perf_session__deliver_synth_event(pt->session, event, &sample);
943 if (ret)
944 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
945 ret);
946
947 return ret;
948}
949
950static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
951{
952 int ret;
953 struct intel_pt *pt = ptq->pt;
954 union perf_event *event = ptq->event_buf;
955 struct perf_sample sample = { .ip = 0, };
956
957 event->sample.header.type = PERF_RECORD_SAMPLE;
958 event->sample.header.misc = PERF_RECORD_MISC_USER;
959 event->sample.header.size = sizeof(struct perf_event_header);
960
961 if (!pt->timeless_decoding)
962 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
963
964 sample.ip = ptq->state->from_ip;
965 sample.pid = ptq->pid;
966 sample.tid = ptq->tid;
967 sample.addr = ptq->state->to_ip;
968 sample.id = ptq->pt->transactions_id;
969 sample.stream_id = ptq->pt->transactions_id;
970 sample.period = 1;
971 sample.cpu = ptq->cpu;
972 sample.flags = ptq->flags;
973 sample.insn_len = ptq->insn_len;
974
975 if (pt->synth_opts.callchain) {
976 thread_stack__sample(ptq->thread, ptq->chain,
977 pt->synth_opts.callchain_sz, sample.ip);
978 sample.callchain = ptq->chain;
979 }
980
981 if (pt->synth_opts.inject) {
982 ret = intel_pt_inject_event(event, &sample,
983 pt->transactions_sample_type,
984 pt->synth_needs_swap);
985 if (ret)
986 return ret;
987 }
988
989 ret = perf_session__deliver_synth_event(pt->session, event, &sample);
990 if (ret)
991 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
992 ret);
993
994 return ret;
995}
996
997static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
998 pid_t pid, pid_t tid, u64 ip)
999{
1000 union perf_event event;
1001 char msg[MAX_AUXTRACE_ERROR_MSG];
1002 int err;
1003
1004 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1005
1006 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1007 code, cpu, pid, tid, ip, msg);
1008
1009 err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1010 if (err)
1011 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1012 err);
1013
1014 return err;
1015}
1016
1017static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1018{
1019 struct auxtrace_queue *queue;
1020 pid_t tid = ptq->next_tid;
1021 int err;
1022
1023 if (tid == -1)
1024 return 0;
1025
1026 intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1027
1028 err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1029
1030 queue = &pt->queues.queue_array[ptq->queue_nr];
1031 intel_pt_set_pid_tid_cpu(pt, queue);
1032
1033 ptq->next_tid = -1;
1034
1035 return err;
1036}
1037
1038static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1039{
1040 struct intel_pt *pt = ptq->pt;
1041
1042 return ip == pt->switch_ip &&
1043 (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1044 !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1045 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1046}
1047
1048static int intel_pt_sample(struct intel_pt_queue *ptq)
1049{
1050 const struct intel_pt_state *state = ptq->state;
1051 struct intel_pt *pt = ptq->pt;
1052 int err;
1053
1054 if (!ptq->have_sample)
1055 return 0;
1056
1057 ptq->have_sample = false;
1058
1059 if (pt->sample_instructions &&
1060 (state->type & INTEL_PT_INSTRUCTION)) {
1061 err = intel_pt_synth_instruction_sample(ptq);
1062 if (err)
1063 return err;
1064 }
1065
1066 if (pt->sample_transactions &&
1067 (state->type & INTEL_PT_TRANSACTION)) {
1068 err = intel_pt_synth_transaction_sample(ptq);
1069 if (err)
1070 return err;
1071 }
1072
1073 if (!(state->type & INTEL_PT_BRANCH))
1074 return 0;
1075
1076 if (pt->synth_opts.callchain)
1077 thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1078 state->to_ip, ptq->insn_len,
1079 state->trace_nr);
1080 else
1081 thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1082
1083 if (pt->sample_branches) {
1084 err = intel_pt_synth_branch_sample(ptq);
1085 if (err)
1086 return err;
1087 }
1088
1089 if (!pt->sync_switch)
1090 return 0;
1091
1092 if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1093 switch (ptq->switch_state) {
1094 case INTEL_PT_SS_UNKNOWN:
1095 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1096 err = intel_pt_next_tid(pt, ptq);
1097 if (err)
1098 return err;
1099 ptq->switch_state = INTEL_PT_SS_TRACING;
1100 break;
1101 default:
1102 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1103 return 1;
1104 }
1105 } else if (!state->to_ip) {
1106 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1107 } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1108 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1109 } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1110 state->to_ip == pt->ptss_ip &&
1111 (ptq->flags & PERF_IP_FLAG_CALL)) {
1112 ptq->switch_state = INTEL_PT_SS_TRACING;
1113 }
1114
1115 return 0;
1116}
1117
1118static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
1119{
1120 struct map *map;
1121 struct symbol *sym, *start;
1122 u64 ip, switch_ip = 0;
1123
1124 if (ptss_ip)
1125 *ptss_ip = 0;
1126
1127 map = machine__kernel_map(machine, MAP__FUNCTION);
1128 if (!map)
1129 return 0;
1130
1131 if (map__load(map, machine->symbol_filter))
1132 return 0;
1133
1134 start = dso__first_symbol(map->dso, MAP__FUNCTION);
1135
1136 for (sym = start; sym; sym = dso__next_symbol(sym)) {
1137 if (sym->binding == STB_GLOBAL &&
1138 !strcmp(sym->name, "__switch_to")) {
1139 ip = map->unmap_ip(map, sym->start);
1140 if (ip >= map->start && ip < map->end) {
1141 switch_ip = ip;
1142 break;
1143 }
1144 }
1145 }
1146
1147 if (!switch_ip || !ptss_ip)
1148 return 0;
1149
1150 for (sym = start; sym; sym = dso__next_symbol(sym)) {
1151 if (!strcmp(sym->name, "perf_trace_sched_switch")) {
1152 ip = map->unmap_ip(map, sym->start);
1153 if (ip >= map->start && ip < map->end) {
1154 *ptss_ip = ip;
1155 break;
1156 }
1157 }
1158 }
1159
1160 return switch_ip;
1161}
1162
1163static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1164{
1165 const struct intel_pt_state *state = ptq->state;
1166 struct intel_pt *pt = ptq->pt;
1167 int err;
1168
1169 if (!pt->kernel_start) {
1170 pt->kernel_start = machine__kernel_start(pt->machine);
1171 if (pt->per_cpu_mmaps && pt->have_sched_switch &&
1172 !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1173 !pt->sampling_mode) {
1174 pt->switch_ip = intel_pt_switch_ip(pt->machine,
1175 &pt->ptss_ip);
1176 if (pt->switch_ip) {
1177 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1178 pt->switch_ip, pt->ptss_ip);
1179 pt->sync_switch = true;
1180 }
1181 }
1182 }
1183
1184 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1185 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1186 while (1) {
1187 err = intel_pt_sample(ptq);
1188 if (err)
1189 return err;
1190
1191 state = intel_pt_decode(ptq->decoder);
1192 if (state->err) {
1193 if (state->err == INTEL_PT_ERR_NODATA)
1194 return 1;
1195 if (pt->sync_switch &&
1196 state->from_ip >= pt->kernel_start) {
1197 pt->sync_switch = false;
1198 intel_pt_next_tid(pt, ptq);
1199 }
1200 if (pt->synth_opts.errors) {
1201 err = intel_pt_synth_error(pt, state->err,
1202 ptq->cpu, ptq->pid,
1203 ptq->tid,
1204 state->from_ip);
1205 if (err)
1206 return err;
1207 }
1208 continue;
1209 }
1210
1211 ptq->state = state;
1212 ptq->have_sample = true;
1213 intel_pt_sample_flags(ptq);
1214
1215 /* Use estimated TSC upon return to user space */
1216 if (pt->est_tsc &&
1217 (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1218 state->to_ip && state->to_ip < pt->kernel_start) {
1219 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1220 state->timestamp, state->est_timestamp);
1221 ptq->timestamp = state->est_timestamp;
1222 /* Use estimated TSC in unknown switch state */
1223 } else if (pt->sync_switch &&
1224 ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1225 intel_pt_is_switch_ip(ptq, state->to_ip) &&
1226 ptq->next_tid == -1) {
1227 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1228 state->timestamp, state->est_timestamp);
1229 ptq->timestamp = state->est_timestamp;
1230 } else if (state->timestamp > ptq->timestamp) {
1231 ptq->timestamp = state->timestamp;
1232 }
1233
1234 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1235 *timestamp = ptq->timestamp;
1236 return 0;
1237 }
1238 }
1239 return 0;
1240}
1241
1242static inline int intel_pt_update_queues(struct intel_pt *pt)
1243{
1244 if (pt->queues.new_data) {
1245 pt->queues.new_data = false;
1246 return intel_pt_setup_queues(pt);
1247 }
1248 return 0;
1249}
1250
1251static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1252{
1253 unsigned int queue_nr;
1254 u64 ts;
1255 int ret;
1256
1257 while (1) {
1258 struct auxtrace_queue *queue;
1259 struct intel_pt_queue *ptq;
1260
1261 if (!pt->heap.heap_cnt)
1262 return 0;
1263
1264 if (pt->heap.heap_array[0].ordinal >= timestamp)
1265 return 0;
1266
1267 queue_nr = pt->heap.heap_array[0].queue_nr;
1268 queue = &pt->queues.queue_array[queue_nr];
1269 ptq = queue->priv;
1270
1271 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1272 queue_nr, pt->heap.heap_array[0].ordinal,
1273 timestamp);
1274
1275 auxtrace_heap__pop(&pt->heap);
1276
1277 if (pt->heap.heap_cnt) {
1278 ts = pt->heap.heap_array[0].ordinal + 1;
1279 if (ts > timestamp)
1280 ts = timestamp;
1281 } else {
1282 ts = timestamp;
1283 }
1284
1285 intel_pt_set_pid_tid_cpu(pt, queue);
1286
1287 ret = intel_pt_run_decoder(ptq, &ts);
1288
1289 if (ret < 0) {
1290 auxtrace_heap__add(&pt->heap, queue_nr, ts);
1291 return ret;
1292 }
1293
1294 if (!ret) {
1295 ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1296 if (ret < 0)
1297 return ret;
1298 } else {
1299 ptq->on_heap = false;
1300 }
1301 }
1302
1303 return 0;
1304}
1305
1306static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1307 u64 time_)
1308{
1309 struct auxtrace_queues *queues = &pt->queues;
1310 unsigned int i;
1311 u64 ts = 0;
1312
1313 for (i = 0; i < queues->nr_queues; i++) {
1314 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1315 struct intel_pt_queue *ptq = queue->priv;
1316
1317 if (ptq && (tid == -1 || ptq->tid == tid)) {
1318 ptq->time = time_;
1319 intel_pt_set_pid_tid_cpu(pt, queue);
1320 intel_pt_run_decoder(ptq, &ts);
1321 }
1322 }
1323 return 0;
1324}
1325
1326static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1327{
1328 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1329 sample->pid, sample->tid, 0);
1330}
1331
1332static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1333{
1334 unsigned i, j;
1335
1336 if (cpu < 0 || !pt->queues.nr_queues)
1337 return NULL;
1338
1339 if ((unsigned)cpu >= pt->queues.nr_queues)
1340 i = pt->queues.nr_queues - 1;
1341 else
1342 i = cpu;
1343
1344 if (pt->queues.queue_array[i].cpu == cpu)
1345 return pt->queues.queue_array[i].priv;
1346
1347 for (j = 0; i > 0; j++) {
1348 if (pt->queues.queue_array[--i].cpu == cpu)
1349 return pt->queues.queue_array[i].priv;
1350 }
1351
1352 for (; j < pt->queues.nr_queues; j++) {
1353 if (pt->queues.queue_array[j].cpu == cpu)
1354 return pt->queues.queue_array[j].priv;
1355 }
1356
1357 return NULL;
1358}
1359
1360static int intel_pt_process_switch(struct intel_pt *pt,
1361 struct perf_sample *sample)
1362{
1363 struct intel_pt_queue *ptq;
1364 struct perf_evsel *evsel;
1365 pid_t tid;
1366 int cpu, err;
1367
1368 evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1369 if (evsel != pt->switch_evsel)
1370 return 0;
1371
1372 tid = perf_evsel__intval(evsel, sample, "next_pid");
1373 cpu = sample->cpu;
1374
1375 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1376 cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1377 &pt->tc));
1378
1379 if (!pt->sync_switch)
1380 goto out;
1381
1382 ptq = intel_pt_cpu_to_ptq(pt, cpu);
1383 if (!ptq)
1384 goto out;
1385
1386 switch (ptq->switch_state) {
1387 case INTEL_PT_SS_NOT_TRACING:
1388 ptq->next_tid = -1;
1389 break;
1390 case INTEL_PT_SS_UNKNOWN:
1391 case INTEL_PT_SS_TRACING:
1392 ptq->next_tid = tid;
1393 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1394 return 0;
1395 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1396 if (!ptq->on_heap) {
1397 ptq->timestamp = perf_time_to_tsc(sample->time,
1398 &pt->tc);
1399 err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1400 ptq->timestamp);
1401 if (err)
1402 return err;
1403 ptq->on_heap = true;
1404 }
1405 ptq->switch_state = INTEL_PT_SS_TRACING;
1406 break;
1407 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1408 ptq->next_tid = tid;
1409 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1410 break;
1411 default:
1412 break;
1413 }
1414out:
1415 return machine__set_current_tid(pt->machine, cpu, -1, tid);
1416}
1417
1418static int intel_pt_process_itrace_start(struct intel_pt *pt,
1419 union perf_event *event,
1420 struct perf_sample *sample)
1421{
1422 if (!pt->per_cpu_mmaps)
1423 return 0;
1424
1425 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1426 sample->cpu, event->itrace_start.pid,
1427 event->itrace_start.tid, sample->time,
1428 perf_time_to_tsc(sample->time, &pt->tc));
1429
1430 return machine__set_current_tid(pt->machine, sample->cpu,
1431 event->itrace_start.pid,
1432 event->itrace_start.tid);
1433}
1434
1435static int intel_pt_process_event(struct perf_session *session,
1436 union perf_event *event,
1437 struct perf_sample *sample,
1438 struct perf_tool *tool)
1439{
1440 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1441 auxtrace);
1442 u64 timestamp;
1443 int err = 0;
1444
1445 if (dump_trace)
1446 return 0;
1447
1448 if (!tool->ordered_events) {
1449 pr_err("Intel Processor Trace requires ordered events\n");
1450 return -EINVAL;
1451 }
1452
1453 if (sample->time)
1454 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1455 else
1456 timestamp = 0;
1457
1458 if (timestamp || pt->timeless_decoding) {
1459 err = intel_pt_update_queues(pt);
1460 if (err)
1461 return err;
1462 }
1463
1464 if (pt->timeless_decoding) {
1465 if (event->header.type == PERF_RECORD_EXIT) {
1466 err = intel_pt_process_timeless_queues(pt,
1467 event->comm.tid,
1468 sample->time);
1469 }
1470 } else if (timestamp) {
1471 err = intel_pt_process_queues(pt, timestamp);
1472 }
1473 if (err)
1474 return err;
1475
1476 if (event->header.type == PERF_RECORD_AUX &&
1477 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1478 pt->synth_opts.errors) {
1479 err = intel_pt_lost(pt, sample);
1480 if (err)
1481 return err;
1482 }
1483
1484 if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1485 err = intel_pt_process_switch(pt, sample);
1486 else if (event->header.type == PERF_RECORD_ITRACE_START)
1487 err = intel_pt_process_itrace_start(pt, event, sample);
1488
1489 intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1490 perf_event__name(event->header.type), event->header.type,
1491 sample->cpu, sample->time, timestamp);
1492
1493 return err;
1494}
1495
1496static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1497{
1498 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1499 auxtrace);
1500 int ret;
1501
1502 if (dump_trace)
1503 return 0;
1504
1505 if (!tool->ordered_events)
1506 return -EINVAL;
1507
1508 ret = intel_pt_update_queues(pt);
1509 if (ret < 0)
1510 return ret;
1511
1512 if (pt->timeless_decoding)
1513 return intel_pt_process_timeless_queues(pt, -1,
1514 MAX_TIMESTAMP - 1);
1515
1516 return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1517}
1518
1519static void intel_pt_free_events(struct perf_session *session)
1520{
1521 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1522 auxtrace);
1523 struct auxtrace_queues *queues = &pt->queues;
1524 unsigned int i;
1525
1526 for (i = 0; i < queues->nr_queues; i++) {
1527 intel_pt_free_queue(queues->queue_array[i].priv);
1528 queues->queue_array[i].priv = NULL;
1529 }
1530 intel_pt_log_disable();
1531 auxtrace_queues__free(queues);
1532}
1533
1534static void intel_pt_free(struct perf_session *session)
1535{
1536 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1537 auxtrace);
1538
1539 auxtrace_heap__free(&pt->heap);
1540 intel_pt_free_events(session);
1541 session->auxtrace = NULL;
1542 thread__delete(pt->unknown_thread);
1543 free(pt);
1544}
1545
1546static int intel_pt_process_auxtrace_event(struct perf_session *session,
1547 union perf_event *event,
1548 struct perf_tool *tool __maybe_unused)
1549{
1550 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1551 auxtrace);
1552
1553 if (pt->sampling_mode)
1554 return 0;
1555
1556 if (!pt->data_queued) {
1557 struct auxtrace_buffer *buffer;
1558 off_t data_offset;
1559 int fd = perf_data_file__fd(session->file);
1560 int err;
1561
1562 if (perf_data_file__is_pipe(session->file)) {
1563 data_offset = 0;
1564 } else {
1565 data_offset = lseek(fd, 0, SEEK_CUR);
1566 if (data_offset == -1)
1567 return -errno;
1568 }
1569
1570 err = auxtrace_queues__add_event(&pt->queues, session, event,
1571 data_offset, &buffer);
1572 if (err)
1573 return err;
1574
1575 /* Dump here now we have copied a piped trace out of the pipe */
1576 if (dump_trace) {
1577 if (auxtrace_buffer__get_data(buffer, fd)) {
1578 intel_pt_dump_event(pt, buffer->data,
1579 buffer->size);
1580 auxtrace_buffer__put_data(buffer);
1581 }
1582 }
1583 }
1584
1585 return 0;
1586}
1587
1588struct intel_pt_synth {
1589 struct perf_tool dummy_tool;
1590 struct perf_session *session;
1591};
1592
1593static int intel_pt_event_synth(struct perf_tool *tool,
1594 union perf_event *event,
1595 struct perf_sample *sample __maybe_unused,
1596 struct machine *machine __maybe_unused)
1597{
1598 struct intel_pt_synth *intel_pt_synth =
1599 container_of(tool, struct intel_pt_synth, dummy_tool);
1600
1601 return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1602 NULL);
1603}
1604
1605static int intel_pt_synth_event(struct perf_session *session,
1606 struct perf_event_attr *attr, u64 id)
1607{
1608 struct intel_pt_synth intel_pt_synth;
1609
1610 memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1611 intel_pt_synth.session = session;
1612
1613 return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1614 &id, intel_pt_event_synth);
1615}
1616
1617static int intel_pt_synth_events(struct intel_pt *pt,
1618 struct perf_session *session)
1619{
1620 struct perf_evlist *evlist = session->evlist;
1621 struct perf_evsel *evsel;
1622 struct perf_event_attr attr;
1623 bool found = false;
1624 u64 id;
1625 int err;
1626
1627 evlist__for_each(evlist, evsel) {
1628 if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1629 found = true;
1630 break;
1631 }
1632 }
1633
1634 if (!found) {
1635 pr_debug("There are no selected events with Intel Processor Trace data\n");
1636 return 0;
1637 }
1638
1639 memset(&attr, 0, sizeof(struct perf_event_attr));
1640 attr.size = sizeof(struct perf_event_attr);
1641 attr.type = PERF_TYPE_HARDWARE;
1642 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1643 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1644 PERF_SAMPLE_PERIOD;
1645 if (pt->timeless_decoding)
1646 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1647 else
1648 attr.sample_type |= PERF_SAMPLE_TIME;
1649 if (!pt->per_cpu_mmaps)
1650 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1651 attr.exclude_user = evsel->attr.exclude_user;
1652 attr.exclude_kernel = evsel->attr.exclude_kernel;
1653 attr.exclude_hv = evsel->attr.exclude_hv;
1654 attr.exclude_host = evsel->attr.exclude_host;
1655 attr.exclude_guest = evsel->attr.exclude_guest;
1656 attr.sample_id_all = evsel->attr.sample_id_all;
1657 attr.read_format = evsel->attr.read_format;
1658
1659 id = evsel->id[0] + 1000000000;
1660 if (!id)
1661 id = 1;
1662
1663 if (pt->synth_opts.instructions) {
1664 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1665 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1666 attr.sample_period =
1667 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1668 else
1669 attr.sample_period = pt->synth_opts.period;
1670 pt->instructions_sample_period = attr.sample_period;
1671 if (pt->synth_opts.callchain)
1672 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1673 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1674 id, (u64)attr.sample_type);
1675 err = intel_pt_synth_event(session, &attr, id);
1676 if (err) {
1677 pr_err("%s: failed to synthesize 'instructions' event type\n",
1678 __func__);
1679 return err;
1680 }
1681 pt->sample_instructions = true;
1682 pt->instructions_sample_type = attr.sample_type;
1683 pt->instructions_id = id;
1684 id += 1;
1685 }
1686
1687 if (pt->synth_opts.transactions) {
1688 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1689 attr.sample_period = 1;
1690 if (pt->synth_opts.callchain)
1691 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1692 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1693 id, (u64)attr.sample_type);
1694 err = intel_pt_synth_event(session, &attr, id);
1695 if (err) {
1696 pr_err("%s: failed to synthesize 'transactions' event type\n",
1697 __func__);
1698 return err;
1699 }
1700 pt->sample_transactions = true;
1701 pt->transactions_id = id;
1702 id += 1;
1703 evlist__for_each(evlist, evsel) {
1704 if (evsel->id && evsel->id[0] == pt->transactions_id) {
1705 if (evsel->name)
1706 zfree(&evsel->name);
1707 evsel->name = strdup("transactions");
1708 break;
1709 }
1710 }
1711 }
1712
1713 if (pt->synth_opts.branches) {
1714 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1715 attr.sample_period = 1;
1716 attr.sample_type |= PERF_SAMPLE_ADDR;
1717 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1718 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1719 id, (u64)attr.sample_type);
1720 err = intel_pt_synth_event(session, &attr, id);
1721 if (err) {
1722 pr_err("%s: failed to synthesize 'branches' event type\n",
1723 __func__);
1724 return err;
1725 }
1726 pt->sample_branches = true;
1727 pt->branches_sample_type = attr.sample_type;
1728 pt->branches_id = id;
1729 }
1730
1731 pt->synth_needs_swap = evsel->needs_swap;
1732
1733 return 0;
1734}
1735
1736static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
1737{
1738 struct perf_evsel *evsel;
1739
1740 evlist__for_each_reverse(evlist, evsel) {
1741 const char *name = perf_evsel__name(evsel);
1742
1743 if (!strcmp(name, "sched:sched_switch"))
1744 return evsel;
1745 }
1746
1747 return NULL;
1748}
1749
1750static const char * const intel_pt_info_fmts[] = {
1751 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
1752 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
1753 [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
1754 [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
1755 [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
1756 [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
1757 [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
1758 [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
1759 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
1760 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
1761};
1762
1763static void intel_pt_print_info(u64 *arr, int start, int finish)
1764{
1765 int i;
1766
1767 if (!dump_trace)
1768 return;
1769
1770 for (i = start; i <= finish; i++)
1771 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
1772}
1773
1774int intel_pt_process_auxtrace_info(union perf_event *event,
1775 struct perf_session *session)
1776{
1777 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
1778 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
1779 struct intel_pt *pt;
1780 int err;
1781
1782 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
1783 min_sz)
1784 return -EINVAL;
1785
1786 pt = zalloc(sizeof(struct intel_pt));
1787 if (!pt)
1788 return -ENOMEM;
1789
1790 err = auxtrace_queues__init(&pt->queues);
1791 if (err)
1792 goto err_free;
1793
1794 intel_pt_log_set_name(INTEL_PT_PMU_NAME);
1795
1796 pt->session = session;
1797 pt->machine = &session->machines.host; /* No kvm support */
1798 pt->auxtrace_type = auxtrace_info->type;
1799 pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
1800 pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
1801 pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
1802 pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
1803 pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
1804 pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
1805 pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
1806 pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
1807 pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
1808 pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
1809 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
1810 INTEL_PT_PER_CPU_MMAPS);
1811
1812 pt->timeless_decoding = intel_pt_timeless_decoding(pt);
1813 pt->have_tsc = intel_pt_have_tsc(pt);
1814 pt->sampling_mode = false;
1815 pt->est_tsc = !pt->timeless_decoding;
1816
1817 pt->unknown_thread = thread__new(999999999, 999999999);
1818 if (!pt->unknown_thread) {
1819 err = -ENOMEM;
1820 goto err_free_queues;
1821 }
1822 err = thread__set_comm(pt->unknown_thread, "unknown", 0);
1823 if (err)
1824 goto err_delete_thread;
1825 if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
1826 err = -ENOMEM;
1827 goto err_delete_thread;
1828 }
1829
1830 pt->auxtrace.process_event = intel_pt_process_event;
1831 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
1832 pt->auxtrace.flush_events = intel_pt_flush;
1833 pt->auxtrace.free_events = intel_pt_free_events;
1834 pt->auxtrace.free = intel_pt_free;
1835 session->auxtrace = &pt->auxtrace;
1836
1837 if (dump_trace)
1838 return 0;
1839
1840 if (pt->have_sched_switch == 1) {
1841 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
1842 if (!pt->switch_evsel) {
1843 pr_err("%s: missing sched_switch event\n", __func__);
1844 goto err_delete_thread;
1845 }
1846 }
1847
1848 if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
1849 pt->synth_opts = *session->itrace_synth_opts;
1850 } else {
1851 itrace_synth_opts__set_default(&pt->synth_opts);
1852 if (use_browser != -1) {
1853 pt->synth_opts.branches = false;
1854 pt->synth_opts.callchain = true;
1855 }
1856 }
1857
1858 if (pt->synth_opts.log)
1859 intel_pt_log_enable();
1860
1861 /* Maximum non-turbo ratio is TSC freq / 100 MHz */
1862 if (pt->tc.time_mult) {
1863 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
1864
1865 pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
1866 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
1867 intel_pt_log("Maximum non-turbo ratio %u\n",
1868 pt->max_non_turbo_ratio);
1869 }
1870
1871 if (pt->synth_opts.calls)
1872 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
1873 PERF_IP_FLAG_TRACE_END;
1874 if (pt->synth_opts.returns)
1875 pt->branches_filter |= PERF_IP_FLAG_RETURN |
1876 PERF_IP_FLAG_TRACE_BEGIN;
1877
1878 if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
1879 symbol_conf.use_callchain = true;
1880 if (callchain_register_param(&callchain_param) < 0) {
1881 symbol_conf.use_callchain = false;
1882 pt->synth_opts.callchain = false;
1883 }
1884 }
1885
1886 err = intel_pt_synth_events(pt, session);
1887 if (err)
1888 goto err_delete_thread;
1889
1890 err = auxtrace_queues__process_index(&pt->queues, session);
1891 if (err)
1892 goto err_delete_thread;
1893
1894 if (pt->queues.populated)
1895 pt->data_queued = true;
1896
1897 if (pt->timeless_decoding)
1898 pr_debug2("Intel PT decoding without timestamps\n");
1899
1900 return 0;
1901
1902err_delete_thread:
1903 thread__delete(pt->unknown_thread);
1904err_free_queues:
1905 intel_pt_log_disable();
1906 auxtrace_queues__free(&pt->queues);
1907 session->auxtrace = NULL;
1908err_free:
1909 free(pt);
1910 return err;
1911}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
new file mode 100644
index 000000000000..a1bfe93473ba
--- /dev/null
+++ b/tools/perf/util/intel-pt.h
@@ -0,0 +1,51 @@
1/*
2 * intel_pt.h: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__PERF_INTEL_PT_H__
17#define INCLUDE__PERF_INTEL_PT_H__
18
19#define INTEL_PT_PMU_NAME "intel_pt"
20
21enum {
22 INTEL_PT_PMU_TYPE,
23 INTEL_PT_TIME_SHIFT,
24 INTEL_PT_TIME_MULT,
25 INTEL_PT_TIME_ZERO,
26 INTEL_PT_CAP_USER_TIME_ZERO,
27 INTEL_PT_TSC_BIT,
28 INTEL_PT_NORETCOMP_BIT,
29 INTEL_PT_HAVE_SCHED_SWITCH,
30 INTEL_PT_SNAPSHOT_MODE,
31 INTEL_PT_PER_CPU_MMAPS,
32 INTEL_PT_AUXTRACE_PRIV_MAX,
33};
34
35#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
36
37struct auxtrace_record;
38struct perf_tool;
39union perf_event;
40struct perf_session;
41struct perf_event_attr;
42struct perf_pmu;
43
44struct auxtrace_record *intel_pt_recording_init(int *err);
45
46int intel_pt_process_auxtrace_info(union perf_event *event,
47 struct perf_session *session);
48
49struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
50
51#endif