aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 22:49:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 22:49:05 -0400
commit41d859a83c567a9c9f50a34082cc64aab0abb0cd (patch)
treeab911ea521701401413d041e1b92225f3dbdab41
parent4658000955d1864b54890214434e171949c7f1c5 (diff)
parentbac2e4a96d1c0bcce5e9654dcc902f75576b9b03 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Main perf kernel side changes: - uprobes updates/fixes. (Oleg Nesterov) - Add PERF_RECORD_SWITCH to indicate context switches and use it in tooling. (Adrian Hunter) - Support BPF programs attached to uprobes and first steps for BPF tooling support. (Wang Nan) - x86 generic x86 MSR-to-perf PMU driver. (Andy Lutomirski) - x86 Intel PT, LBR and BTS updates. (Alexander Shishkin) - x86 Intel Skylake support. (Andi Kleen) - x86 Intel Knights Landing (KNL) RAPL support. (Dasaratharaman Chandramouli) - x86 Intel Broadwell-DE uncore support. (Kan Liang) - x86 hw breakpoints robustization (Andy Lutomirski) Main perf tooling side changes: - Support Intel PT in several tools, enabling the use of the processor trace feature introduced in Intel Broadwell processors: (Adrian Hunter) # dmesg | grep Performance # [0.188477] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver. # perf record -e intel_pt//u -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.216 MB perf.data ] # perf script # then navigate in the tool output to some area, like this one: 184 1030 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba661440 dl_main (/usr/lib64/ld-2.17.so) 185 1457 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba669f10 _dl_new_object (/usr/lib64/ld-2.17.so) 186 9f37 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba677b90 strlen (/usr/lib64/ld-2.17.so) 187 7ba3 strlen (/usr/lib64/ld-2.17.so) => 7f21ba677c75 strlen (/usr/lib64/ld-2.17.so) 188 7c78 strlen (/usr/lib64/ld-2.17.so) => 7f21ba669f3c _dl_new_object (/usr/lib64/ld-2.17.so) 189 9f8a _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba65fab0 calloc@plt (/usr/lib64/ld-2.17.so) 190 fab0 calloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e70 calloc (/usr/lib64/ld-2.17.so) 191 5e87 calloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa90 malloc@plt (/usr/lib64/ld-2.17.so) 192 fa90 malloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e60 malloc (/usr/lib64/ld-2.17.so) 193 5e68 malloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) 194 fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) => 7f21ba675d50 __libc_memalign (/usr/lib64/ld-2.17.so) 195 5d63 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e20 __libc_memalign (/usr/lib64/ld-2.17.so) 196 5e40 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675d73 __libc_memalign (/usr/lib64/ld-2.17.so) 197 5d97 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e18 __libc_memalign (/usr/lib64/ld-2.17.so) 198 5e1e __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675df9 __libc_memalign (/usr/lib64/ld-2.17.so) 199 5e10 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba669f8f _dl_new_object (/usr/lib64/ld-2.17.so) 200 9fc2 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba678e70 memcpy (/usr/lib64/ld-2.17.so) 201 8e8c memcpy (/usr/lib64/ld-2.17.so) => 7f21ba678ea0 memcpy (/usr/lib64/ld-2.17.so) - Add support for using several Intel PT features (CYC, MTC packets), the relevant documentation was updated in: tools/perf/Documentation/intel-pt.txt briefly describing those packets, its purposes, how to configure them in the event config terms and relevant external documentation for further reading. (Adrian Hunter) - Introduce support for probing at an absolute address, for user and kernel 'perf probe's, useful when one have the symbol maps on a developer machine but not on an embedded system. (Wang Nan) - Add Intel BTS support, with a call-graph script to show it and PT in use in a GUI using 'perf script' python scripting with postgresql and Qt. (Adrian Hunter) - Allow selecting the type of callchains per event, including disabling callchains in all but one entry in an event list, to save space, and also to ask for the callchains collected in one event to be used in other events. (Kan Liang) - Beautify more syscall arguments in 'perf trace': (Arnaldo Carvalho de Melo) * A bunch more translate file/pathnames from pointers to strings. * Convert numbers to strings for the 'keyctl' syscall 'option' arg. * Add missing 'clockid' entries. - Introduce 'srcfile' sort key: (Andi Kleen) # perf record -F 10000 usleep 1 # perf report --stdio --dsos '[kernel.vmlinux]' -s srcfile <SNIP> # Overhead Source File 26.49% copy_page_64.S 5.49% signal.c 0.51% msr.h # It can be combined with other fields, for instance, experiment with '-s srcfile,symbol'. There are some oddities in some distros and with some specific DSOs, being investigated, so your mileage may vary. - Support per-event 'freq' term: (Namhyung Kim) $ perf record -e 'cpu/instructions,freq=1234/',cycles -c 1000 sleep 1 $ perf evlist -F cpu/instructions,freq=1234/: sample_freq=1234 cycles: sample_period=1000 $ - Deref sys_enter pointer args with contents from probe:vfs_getname, showing pathnames instead of pointers in many syscalls in 'perf trace'. (Arnaldo Carvalho de Melo) - Stop collecting /proc/kallsyms in perf.data files, saving about 4.5MB on a typical x86-64 system, use the the symbol resolution routines used in all the other tools (report, top, etc) now that we can ask libtraceevent to use perf's symbol resolution code. (Arnaldo Carvalho de Melo) - Allow filtering out of perf's PID via 'perf record --exclude-perf'. (Wang Nan) - 'perf trace' now supports syscall groups, like strace, i.e: $ trace -e file touch file Will expand 'file' into multiple, file related, syscalls. More work needed to add extra groups for other syscall groups, and also to complement what was added for the 'file' group, included as a proof of concept. (Arnaldo Carvalho de Melo) - Add lock_pi stresser to 'perf bench futex', to test the kernel code related to FUTEX_(UN)LOCK_PI. (Davidlohr Bueso) - Let user have timestamps with per-thread recording in 'perf record' (Adrian Hunter) - ... and tons of other changes, see the shortlog and the Git log for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (240 commits) perf evlist: Add backpointer for perf_env to evlist perf tools: Rename perf_session_env to perf_env perf tools: Do not change lib/api/fs/debugfs directly perf tools: Add tracing_path and remove unneeded functions perf buildid: Introduce sysfs/filename__sprintf_build_id perf evsel: Add a backpointer to the evlist a evsel is in perf trace: Add header with copyright and background info perf scripts python: Add new compaction-times script perf stat: Get correct cpu id for print_aggr tools lib traceeveent: Allow for negative numbers in print format perf script: Add --[no-]-demangle/--[no-]-demangle-kernel tracing/uprobes: Do not print '0x (null)' when offset is 0 perf probe: Support probing at absolute address perf probe: Fix error reported when offset without function perf probe: Fix list result when address is zero perf probe: Fix list result when symbol can't be found tools build: Allow duplicate objects in the object list perf tools: Remove export.h from MANIFEST perf probe: Prevent segfault when reading probe point with absolute address perf tools: Update Intel PT documentation ...
-rw-r--r--arch/x86/include/asm/msr-index.h14
-rw-r--r--arch/x86/include/asm/perf_event.h7
-rw-r--r--arch/x86/include/asm/tsc.h1
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_pt.h39
-rw-r--r--arch/x86/kernel/cpu/perf_event.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c280
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c106
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c58
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c85
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c20
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c23
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c166
-rw-r--r--arch/x86/kernel/cpu/perf_event_msr.c242
-rw-r--r--arch/x86/kernel/hw_breakpoint.c31
-rw-r--r--arch/x86/kernel/tsc.c8
-rw-r--r--arch/x86/kernel/uprobes.c9
-rw-r--r--include/linux/kprobes.h2
-rw-r--r--include/linux/trace_events.h7
-rw-r--r--include/linux/uprobes.h17
-rw-r--r--include/uapi/linux/perf_event.h35
-rw-r--r--kernel/events/core.c109
-rw-r--r--kernel/events/ring_buffer.c5
-rw-r--r--kernel/events/uprobes.c228
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/trace_uprobe.c22
-rw-r--r--tools/build/Documentation/Build.txt1
-rw-r--r--tools/build/Makefile.build4
-rw-r--r--tools/build/feature/Makefile13
-rw-r--r--tools/build/feature/test-bpf.c18
-rw-r--r--tools/build/feature/test-glibc.c11
-rw-r--r--tools/build/tests/ex/Build1
-rw-r--r--tools/lib/api/fs/debugfs.c15
-rw-r--r--tools/lib/bpf/.gitignore2
-rw-r--r--tools/lib/bpf/Build1
-rw-r--r--tools/lib/bpf/Makefile195
-rw-r--r--tools/lib/bpf/bpf.c85
-rw-r--r--tools/lib/bpf/bpf.h23
-rw-r--r--tools/lib/bpf/libbpf.c1037
-rw-r--r--tools/lib/bpf/libbpf.h81
-rw-r--r--tools/lib/traceevent/event-parse.c78
-rw-r--r--tools/lib/traceevent/event-parse.h8
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Build1
-rw-r--r--tools/perf/Documentation/intel-bts.txt86
-rw-r--r--tools/perf/Documentation/intel-pt.txt766
-rw-r--r--tools/perf/Documentation/itrace.txt22
-rw-r--r--tools/perf/Documentation/perf-bench.txt4
-rw-r--r--tools/perf/Documentation/perf-inject.txt23
-rw-r--r--tools/perf/Documentation/perf-record.txt30
-rw-r--r--tools/perf/Documentation/perf-report.txt39
-rw-r--r--tools/perf/Documentation/perf-script.txt37
-rw-r--r--tools/perf/Documentation/perf-top.txt21
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/Makefile.perf17
-rw-r--r--tools/perf/arch/alpha/Build1
-rw-r--r--tools/perf/arch/common.c4
-rw-r--r--tools/perf/arch/common.h2
-rw-r--r--tools/perf/arch/mips/Build1
-rw-r--r--tools/perf/arch/parisc/Build1
-rw-r--r--tools/perf/arch/x86/util/Build5
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c83
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c458
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c1007
-rw-r--r--tools/perf/arch/x86/util/pmu.c18
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/futex-lock-pi.c219
-rw-r--r--tools/perf/bench/futex.h20
-rw-r--r--tools/perf/builtin-annotate.c4
-rw-r--r--tools/perf/builtin-bench.c2
-rw-r--r--tools/perf/builtin-buildid-cache.c30
-rw-r--r--tools/perf/builtin-buildid-list.c28
-rw-r--r--tools/perf/builtin-diff.c3
-rw-r--r--tools/perf/builtin-inject.c1
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c25
-rw-r--r--tools/perf/builtin-report.c19
-rw-r--r--tools/perf/builtin-script.c73
-rw-r--r--tools/perf/builtin-stat.c230
-rw-r--r--tools/perf/builtin-top.c9
-rw-r--r--tools/perf/builtin-trace.c469
-rw-r--r--tools/perf/config/Makefile14
-rw-r--r--tools/perf/perf-with-kcore.sh28
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/perf.h3
-rwxr-xr-xtools/perf/python/twatch.py12
-rw-r--r--tools/perf/scripts/python/bin/compaction-times-record2
-rw-r--r--tools/perf/scripts/python/bin/compaction-times-report4
-rw-r--r--tools/perf/scripts/python/call-graph-from-postgresql.py327
-rw-r--r--tools/perf/scripts/python/compaction-times.py311
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py47
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/hists_cumulate.c4
-rw-r--r--tools/perf/tests/llvm.c98
-rw-r--r--tools/perf/tests/make13
-rw-r--r--tools/perf/tests/parse-events.c50
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/tests/thread-map.c4
-rw-r--r--tools/perf/trace/strace/groups/file18
-rw-r--r--tools/perf/ui/browser.c17
-rw-r--r--tools/perf/ui/browser.h7
-rw-r--r--tools/perf/ui/browsers/annotate.c149
-rw-r--r--tools/perf/ui/browsers/header.c4
-rw-r--r--tools/perf/ui/browsers/hists.c66
-rw-r--r--tools/perf/ui/browsers/map.c11
-rw-r--r--tools/perf/ui/browsers/scripts.c2
-rw-r--r--tools/perf/ui/libslang.h3
-rw-r--r--tools/perf/ui/tui/progress.c19
-rw-r--r--tools/perf/ui/tui/util.c2
-rw-r--r--tools/perf/util/Build6
-rw-r--r--tools/perf/util/annotate.c128
-rw-r--r--tools/perf/util/annotate.h19
-rw-r--r--tools/perf/util/auxtrace.c15
-rw-r--r--tools/perf/util/auxtrace.h2
-rw-r--r--tools/perf/util/build-id.c38
-rw-r--r--tools/perf/util/build-id.h6
-rw-r--r--tools/perf/util/callchain.c93
-rw-r--r--tools/perf/util/callchain.h3
-rw-r--r--tools/perf/util/cloexec.h2
-rw-r--r--tools/perf/util/color.c21
-rw-r--r--tools/perf/util/color.h1
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/counts.c52
-rw-r--r--tools/perf/util/counts.h37
-rw-r--r--tools/perf/util/debug.c5
-rw-r--r--tools/perf/util/debug.h1
-rw-r--r--tools/perf/util/dso.h6
-rw-r--r--tools/perf/util/dwarf-aux.c21
-rw-r--r--tools/perf/util/event.c28
-rw-r--r--tools/perf/util/event.h15
-rw-r--r--tools/perf/util/evlist.c48
-rw-r--r--tools/perf/util/evlist.h15
-rw-r--r--tools/perf/util/evsel.c173
-rw-r--r--tools/perf/util/evsel.h42
-rw-r--r--tools/perf/util/header.c36
-rw-r--r--tools/perf/util/header.h5
-rw-r--r--tools/perf/util/hist.c59
-rw-r--r--tools/perf/util/hist.h9
-rw-r--r--tools/perf/util/intel-bts.c933
-rw-r--r--tools/perf/util/intel-bts.h43
-rw-r--r--tools/perf/util/intel-pt-decoder/Build11
-rw-r--r--tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk386
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.c96
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.h221
-rw-r--r--tools/perf/util/intel-pt-decoder/inat_types.h29
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c594
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h201
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c2345
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h109
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c246
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h65
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c155
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h52
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c518
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h70
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt970
-rw-r--r--tools/perf/util/intel-pt.c1956
-rw-r--r--tools/perf/util/intel-pt.h56
-rw-r--r--tools/perf/util/llvm-utils.c408
-rw-r--r--tools/perf/util/llvm-utils.h49
-rw-r--r--tools/perf/util/machine.c27
-rw-r--r--tools/perf/util/machine.h6
-rw-r--r--tools/perf/util/map.c27
-rw-r--r--tools/perf/util/map.h7
-rw-r--r--tools/perf/util/ordered-events.c3
-rw-r--r--tools/perf/util/parse-events.c184
-rw-r--r--tools/perf/util/parse-events.h5
-rw-r--r--tools/perf/util/parse-events.l4
-rw-r--r--tools/perf/util/pmu.c55
-rw-r--r--tools/perf/util/pmu.h1
-rw-r--r--tools/perf/util/probe-event.c603
-rw-r--r--tools/perf/util/probe-event.h13
-rw-r--r--tools/perf/util/probe-file.c301
-rw-r--r--tools/perf/util/probe-file.h18
-rw-r--r--tools/perf/util/probe-finder.c23
-rw-r--r--tools/perf/util/python-ext-sources2
-rw-r--r--tools/perf/util/python.c140
-rw-r--r--tools/perf/util/record.c34
-rw-r--r--tools/perf/util/session.c41
-rw-r--r--tools/perf/util/sort.c80
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/srcline.c6
-rw-r--r--tools/perf/util/stat.c188
-rw-r--r--tools/perf/util/stat.h36
-rw-r--r--tools/perf/util/string.c39
-rw-r--r--tools/perf/util/strlist.c43
-rw-r--r--tools/perf/util/strlist.h9
-rw-r--r--tools/perf/util/symbol-elf.c13
-rw-r--r--tools/perf/util/symbol.c27
-rw-r--r--tools/perf/util/symbol.h7
-rw-r--r--tools/perf/util/thread_map.c6
-rw-r--r--tools/perf/util/tool.h1
-rw-r--r--tools/perf/util/trace-event-info.c22
-rw-r--r--tools/perf/util/trace-event-parse.c30
-rw-r--r--tools/perf/util/trace-event-read.c28
-rw-r--r--tools/perf/util/trace-event.c44
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/util.c147
-rw-r--r--tools/perf/util/util.h17
206 files changed, 19187 insertions, 1584 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 9ebc3d009373..fcd17c1fc0c6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -73,6 +73,12 @@
73#define MSR_LBR_CORE_FROM 0x00000040 73#define MSR_LBR_CORE_FROM 0x00000040
74#define MSR_LBR_CORE_TO 0x00000060 74#define MSR_LBR_CORE_TO 0x00000060
75 75
76#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
77#define LBR_INFO_MISPRED BIT_ULL(63)
78#define LBR_INFO_IN_TX BIT_ULL(62)
79#define LBR_INFO_ABORT BIT_ULL(61)
80#define LBR_INFO_CYCLES 0xffff
81
76#define MSR_IA32_PEBS_ENABLE 0x000003f1 82#define MSR_IA32_PEBS_ENABLE 0x000003f1
77#define MSR_IA32_DS_AREA 0x00000600 83#define MSR_IA32_DS_AREA 0x00000600
78#define MSR_IA32_PERF_CAPABILITIES 0x00000345 84#define MSR_IA32_PERF_CAPABILITIES 0x00000345
@@ -80,13 +86,21 @@
80 86
81#define MSR_IA32_RTIT_CTL 0x00000570 87#define MSR_IA32_RTIT_CTL 0x00000570
82#define RTIT_CTL_TRACEEN BIT(0) 88#define RTIT_CTL_TRACEEN BIT(0)
89#define RTIT_CTL_CYCLEACC BIT(1)
83#define RTIT_CTL_OS BIT(2) 90#define RTIT_CTL_OS BIT(2)
84#define RTIT_CTL_USR BIT(3) 91#define RTIT_CTL_USR BIT(3)
85#define RTIT_CTL_CR3EN BIT(7) 92#define RTIT_CTL_CR3EN BIT(7)
86#define RTIT_CTL_TOPA BIT(8) 93#define RTIT_CTL_TOPA BIT(8)
94#define RTIT_CTL_MTC_EN BIT(9)
87#define RTIT_CTL_TSC_EN BIT(10) 95#define RTIT_CTL_TSC_EN BIT(10)
88#define RTIT_CTL_DISRETC BIT(11) 96#define RTIT_CTL_DISRETC BIT(11)
89#define RTIT_CTL_BRANCH_EN BIT(13) 97#define RTIT_CTL_BRANCH_EN BIT(13)
98#define RTIT_CTL_MTC_RANGE_OFFSET 14
99#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
100#define RTIT_CTL_CYC_THRESH_OFFSET 19
101#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
102#define RTIT_CTL_PSB_FREQ_OFFSET 24
103#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
90#define MSR_IA32_RTIT_STATUS 0x00000571 104#define MSR_IA32_RTIT_STATUS 0x00000571
91#define RTIT_STATUS_CONTEXTEN BIT(1) 105#define RTIT_STATUS_CONTEXTEN BIT(1)
92#define RTIT_STATUS_TRIGGEREN BIT(2) 106#define RTIT_STATUS_TRIGGEREN BIT(2)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dc0f6ed35b08..7bcb861a04e5 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -159,6 +159,13 @@ struct x86_pmu_capability {
159 */ 159 */
160#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) 160#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
161 161
162#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
163#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
164#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
165#define GLOBAL_STATUS_ASIF BIT_ULL(60)
166#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
167#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
168
162/* 169/*
163 * IBS cpuid feature detection 170 * IBS cpuid feature detection
164 */ 171 */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 94605c0e9cee..aad56eb3bbe2 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,7 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern int check_tsc_disabled(void); 52extern int check_tsc_disabled(void);
53extern unsigned long native_calibrate_tsc(void); 53extern unsigned long native_calibrate_tsc(void);
54extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
54 55
55extern int tsc_clocksource_reliable; 56extern int tsc_clocksource_reliable;
56 57
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 9bff68798836..4eb065c6bed2 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -46,6 +46,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
46 perf_event_intel_uncore_snb.o \ 46 perf_event_intel_uncore_snb.o \
47 perf_event_intel_uncore_snbep.o \ 47 perf_event_intel_uncore_snbep.o \
48 perf_event_intel_uncore_nhmex.o 48 perf_event_intel_uncore_nhmex.o
49obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
50obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
49endif 51endif
50 52
51 53
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
index 1c338b0eba05..336878a5d205 100644
--- a/arch/x86/kernel/cpu/intel_pt.h
+++ b/arch/x86/kernel/cpu/intel_pt.h
@@ -25,32 +25,11 @@
25 */ 25 */
26#define TOPA_PMI_MARGIN 512 26#define TOPA_PMI_MARGIN 512
27 27
28/* 28#define TOPA_SHIFT 12
29 * Table of Physical Addresses bits
30 */
31enum topa_sz {
32 TOPA_4K = 0,
33 TOPA_8K,
34 TOPA_16K,
35 TOPA_32K,
36 TOPA_64K,
37 TOPA_128K,
38 TOPA_256K,
39 TOPA_512K,
40 TOPA_1MB,
41 TOPA_2MB,
42 TOPA_4MB,
43 TOPA_8MB,
44 TOPA_16MB,
45 TOPA_32MB,
46 TOPA_64MB,
47 TOPA_128MB,
48 TOPA_SZ_END,
49};
50 29
51static inline unsigned int sizes(enum topa_sz tsz) 30static inline unsigned int sizes(unsigned int tsz)
52{ 31{
53 return 1 << (tsz + 12); 32 return 1 << (tsz + TOPA_SHIFT);
54}; 33};
55 34
56struct topa_entry { 35struct topa_entry {
@@ -66,20 +45,26 @@ struct topa_entry {
66 u64 rsvd4 : 16; 45 u64 rsvd4 : 16;
67}; 46};
68 47
69#define TOPA_SHIFT 12 48#define PT_CPUID_LEAVES 2
70#define PT_CPUID_LEAVES 2 49#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
71 50
72enum pt_capabilities { 51enum pt_capabilities {
73 PT_CAP_max_subleaf = 0, 52 PT_CAP_max_subleaf = 0,
74 PT_CAP_cr3_filtering, 53 PT_CAP_cr3_filtering,
54 PT_CAP_psb_cyc,
55 PT_CAP_mtc,
75 PT_CAP_topa_output, 56 PT_CAP_topa_output,
76 PT_CAP_topa_multiple_entries, 57 PT_CAP_topa_multiple_entries,
58 PT_CAP_single_range_output,
77 PT_CAP_payloads_lip, 59 PT_CAP_payloads_lip,
60 PT_CAP_mtc_periods,
61 PT_CAP_cycle_thresholds,
62 PT_CAP_psb_periods,
78}; 63};
79 64
80struct pt_pmu { 65struct pt_pmu {
81 struct pmu pmu; 66 struct pmu pmu;
82 u32 caps[4 * PT_CPUID_LEAVES]; 67 u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
83}; 68};
84 69
85/** 70/**
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9469dfa55607..f56cf074d01a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1551,7 +1551,7 @@ static void __init filter_events(struct attribute **attrs)
1551} 1551}
1552 1552
1553/* Merge two pointer arrays */ 1553/* Merge two pointer arrays */
1554static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) 1554__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
1555{ 1555{
1556 struct attribute **new; 1556 struct attribute **new;
1557 int j, i; 1557 int j, i;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3e7fd27dfe20..5edf6d868fc1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -165,7 +165,7 @@ struct intel_excl_cntrs {
165 unsigned core_id; /* per-core: core id */ 165 unsigned core_id; /* per-core: core id */
166}; 166};
167 167
168#define MAX_LBR_ENTRIES 16 168#define MAX_LBR_ENTRIES 32
169 169
170enum { 170enum {
171 X86_PERF_KFREE_SHARED = 0, 171 X86_PERF_KFREE_SHARED = 0,
@@ -594,6 +594,7 @@ struct x86_pmu {
594 struct event_constraint *pebs_constraints; 594 struct event_constraint *pebs_constraints;
595 void (*pebs_aliases)(struct perf_event *event); 595 void (*pebs_aliases)(struct perf_event *event);
596 int max_pebs_events; 596 int max_pebs_events;
597 unsigned long free_running_flags;
597 598
598 /* 599 /*
599 * Intel LBR 600 * Intel LBR
@@ -624,6 +625,7 @@ struct x86_pmu {
624struct x86_perf_task_context { 625struct x86_perf_task_context {
625 u64 lbr_from[MAX_LBR_ENTRIES]; 626 u64 lbr_from[MAX_LBR_ENTRIES];
626 u64 lbr_to[MAX_LBR_ENTRIES]; 627 u64 lbr_to[MAX_LBR_ENTRIES];
628 u64 lbr_info[MAX_LBR_ENTRIES];
627 int lbr_callstack_users; 629 int lbr_callstack_users;
628 int lbr_stack_state; 630 int lbr_stack_state;
629}; 631};
@@ -793,6 +795,8 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
793ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); 795ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
794ssize_t intel_event_sysfs_show(char *page, u64 config); 796ssize_t intel_event_sysfs_show(char *page, u64 config);
795 797
798struct attribute **merge_attr(struct attribute **a, struct attribute **b);
799
796#ifdef CONFIG_CPU_SUP_AMD 800#ifdef CONFIG_CPU_SUP_AMD
797 801
798int amd_pmu_init(void); 802int amd_pmu_init(void);
@@ -808,20 +812,6 @@ static inline int amd_pmu_init(void)
808 812
809#ifdef CONFIG_CPU_SUP_INTEL 813#ifdef CONFIG_CPU_SUP_INTEL
810 814
811static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
812{
813 /* user explicitly requested branch sampling */
814 if (has_branch_stack(event))
815 return true;
816
817 /* implicit branch sampling to correct PEBS skid */
818 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
819 x86_pmu.intel_cap.pebs_format < 2)
820 return true;
821
822 return false;
823}
824
825static inline bool intel_pmu_has_bts(struct perf_event *event) 815static inline bool intel_pmu_has_bts(struct perf_event *event)
826{ 816{
827 if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && 817 if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
@@ -873,6 +863,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
873 863
874extern struct event_constraint intel_hsw_pebs_event_constraints[]; 864extern struct event_constraint intel_hsw_pebs_event_constraints[];
875 865
866extern struct event_constraint intel_skl_pebs_event_constraints[];
867
876struct event_constraint *intel_pebs_constraints(struct perf_event *event); 868struct event_constraint *intel_pebs_constraints(struct perf_event *event);
877 869
878void intel_pmu_pebs_enable(struct perf_event *event); 870void intel_pmu_pebs_enable(struct perf_event *event);
@@ -911,6 +903,8 @@ void intel_pmu_lbr_init_snb(void);
911 903
912void intel_pmu_lbr_init_hsw(void); 904void intel_pmu_lbr_init_hsw(void);
913 905
906void intel_pmu_lbr_init_skl(void);
907
914int intel_pmu_setup_lbr_filter(struct perf_event *event); 908int intel_pmu_setup_lbr_filter(struct perf_event *event);
915 909
916void intel_pt_interrupt(void); 910void intel_pt_interrupt(void);
@@ -934,6 +928,7 @@ static inline int is_ht_workaround_enabled(void)
934{ 928{
935 return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); 929 return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
936} 930}
931
937#else /* CONFIG_CPU_SUP_INTEL */ 932#else /* CONFIG_CPU_SUP_INTEL */
938 933
939static inline void reserve_ds_buffers(void) 934static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6326ae24e4d5..3f124d553c5a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -177,6 +177,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
177 EVENT_CONSTRAINT_END 177 EVENT_CONSTRAINT_END
178}; 178};
179 179
180struct event_constraint intel_skl_event_constraints[] = {
181 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
182 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
183 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
184 INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
185 EVENT_CONSTRAINT_END
186};
187
180static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 188static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
181 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 189 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
182 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), 190 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -193,6 +201,13 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
193 EVENT_EXTRA_END 201 EVENT_EXTRA_END
194}; 202};
195 203
204static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
205 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
206 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
207 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
208 EVENT_EXTRA_END
209};
210
196EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 211EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
197EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 212EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
198EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); 213EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@@ -244,6 +259,200 @@ static u64 intel_pmu_event_map(int hw_event)
244 return intel_perfmon_event_map[hw_event]; 259 return intel_perfmon_event_map[hw_event];
245} 260}
246 261
262/*
263 * Notes on the events:
264 * - data reads do not include code reads (comparable to earlier tables)
265 * - data counts include speculative execution (except L1 write, dtlb, bpu)
266 * - remote node access includes remote memory, remote cache, remote mmio.
267 * - prefetches are not included in the counts.
268 * - icache miss does not include decoded icache
269 */
270
271#define SKL_DEMAND_DATA_RD BIT_ULL(0)
272#define SKL_DEMAND_RFO BIT_ULL(1)
273#define SKL_ANY_RESPONSE BIT_ULL(16)
274#define SKL_SUPPLIER_NONE BIT_ULL(17)
275#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
276#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
277#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
278#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
279#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
280 SKL_L3_MISS_REMOTE_HOP0_DRAM| \
281 SKL_L3_MISS_REMOTE_HOP1_DRAM| \
282 SKL_L3_MISS_REMOTE_HOP2P_DRAM)
283#define SKL_SPL_HIT BIT_ULL(30)
284#define SKL_SNOOP_NONE BIT_ULL(31)
285#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
286#define SKL_SNOOP_MISS BIT_ULL(33)
287#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
288#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
289#define SKL_SNOOP_HITM BIT_ULL(36)
290#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
291#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
292 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
293 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
294 SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
295#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
296#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
297 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
298 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
299 SKL_SNOOP_HITM|SKL_SPL_HIT)
300#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
301#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
302#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
303 SKL_L3_MISS_REMOTE_HOP1_DRAM| \
304 SKL_L3_MISS_REMOTE_HOP2P_DRAM)
305
306static __initconst const u64 skl_hw_cache_event_ids
307 [PERF_COUNT_HW_CACHE_MAX]
308 [PERF_COUNT_HW_CACHE_OP_MAX]
309 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
310{
311 [ C(L1D ) ] = {
312 [ C(OP_READ) ] = {
313 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
314 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
315 },
316 [ C(OP_WRITE) ] = {
317 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
318 [ C(RESULT_MISS) ] = 0x0,
319 },
320 [ C(OP_PREFETCH) ] = {
321 [ C(RESULT_ACCESS) ] = 0x0,
322 [ C(RESULT_MISS) ] = 0x0,
323 },
324 },
325 [ C(L1I ) ] = {
326 [ C(OP_READ) ] = {
327 [ C(RESULT_ACCESS) ] = 0x0,
328 [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
329 },
330 [ C(OP_WRITE) ] = {
331 [ C(RESULT_ACCESS) ] = -1,
332 [ C(RESULT_MISS) ] = -1,
333 },
334 [ C(OP_PREFETCH) ] = {
335 [ C(RESULT_ACCESS) ] = 0x0,
336 [ C(RESULT_MISS) ] = 0x0,
337 },
338 },
339 [ C(LL ) ] = {
340 [ C(OP_READ) ] = {
341 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
342 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
343 },
344 [ C(OP_WRITE) ] = {
345 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
346 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
347 },
348 [ C(OP_PREFETCH) ] = {
349 [ C(RESULT_ACCESS) ] = 0x0,
350 [ C(RESULT_MISS) ] = 0x0,
351 },
352 },
353 [ C(DTLB) ] = {
354 [ C(OP_READ) ] = {
355 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
356 [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
357 },
358 [ C(OP_WRITE) ] = {
359 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
360 [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
361 },
362 [ C(OP_PREFETCH) ] = {
363 [ C(RESULT_ACCESS) ] = 0x0,
364 [ C(RESULT_MISS) ] = 0x0,
365 },
366 },
367 [ C(ITLB) ] = {
368 [ C(OP_READ) ] = {
369 [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
370 [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
371 },
372 [ C(OP_WRITE) ] = {
373 [ C(RESULT_ACCESS) ] = -1,
374 [ C(RESULT_MISS) ] = -1,
375 },
376 [ C(OP_PREFETCH) ] = {
377 [ C(RESULT_ACCESS) ] = -1,
378 [ C(RESULT_MISS) ] = -1,
379 },
380 },
381 [ C(BPU ) ] = {
382 [ C(OP_READ) ] = {
383 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
384 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
385 },
386 [ C(OP_WRITE) ] = {
387 [ C(RESULT_ACCESS) ] = -1,
388 [ C(RESULT_MISS) ] = -1,
389 },
390 [ C(OP_PREFETCH) ] = {
391 [ C(RESULT_ACCESS) ] = -1,
392 [ C(RESULT_MISS) ] = -1,
393 },
394 },
395 [ C(NODE) ] = {
396 [ C(OP_READ) ] = {
397 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
398 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
399 },
400 [ C(OP_WRITE) ] = {
401 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
402 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
403 },
404 [ C(OP_PREFETCH) ] = {
405 [ C(RESULT_ACCESS) ] = 0x0,
406 [ C(RESULT_MISS) ] = 0x0,
407 },
408 },
409};
410
411static __initconst const u64 skl_hw_cache_extra_regs
412 [PERF_COUNT_HW_CACHE_MAX]
413 [PERF_COUNT_HW_CACHE_OP_MAX]
414 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
415{
416 [ C(LL ) ] = {
417 [ C(OP_READ) ] = {
418 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
419 SKL_LLC_ACCESS|SKL_ANY_SNOOP,
420 [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
421 SKL_L3_MISS|SKL_ANY_SNOOP|
422 SKL_SUPPLIER_NONE,
423 },
424 [ C(OP_WRITE) ] = {
425 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
426 SKL_LLC_ACCESS|SKL_ANY_SNOOP,
427 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
428 SKL_L3_MISS|SKL_ANY_SNOOP|
429 SKL_SUPPLIER_NONE,
430 },
431 [ C(OP_PREFETCH) ] = {
432 [ C(RESULT_ACCESS) ] = 0x0,
433 [ C(RESULT_MISS) ] = 0x0,
434 },
435 },
436 [ C(NODE) ] = {
437 [ C(OP_READ) ] = {
438 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
439 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
440 [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
441 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
442 },
443 [ C(OP_WRITE) ] = {
444 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
445 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
446 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
447 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
448 },
449 [ C(OP_PREFETCH) ] = {
450 [ C(RESULT_ACCESS) ] = 0x0,
451 [ C(RESULT_MISS) ] = 0x0,
452 },
453 },
454};
455
247#define SNB_DMND_DATA_RD (1ULL << 0) 456#define SNB_DMND_DATA_RD (1ULL << 0)
248#define SNB_DMND_RFO (1ULL << 1) 457#define SNB_DMND_RFO (1ULL << 1)
249#define SNB_DMND_IFETCH (1ULL << 2) 458#define SNB_DMND_IFETCH (1ULL << 2)
@@ -1114,7 +1323,7 @@ static struct extra_reg intel_slm_extra_regs[] __read_mostly =
1114{ 1323{
1115 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 1324 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1116 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), 1325 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
1117 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), 1326 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
1118 EVENT_EXTRA_END 1327 EVENT_EXTRA_END
1119}; 1328};
1120 1329
@@ -1594,6 +1803,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1594 1803
1595 loops = 0; 1804 loops = 0;
1596again: 1805again:
1806 intel_pmu_lbr_read();
1597 intel_pmu_ack_status(status); 1807 intel_pmu_ack_status(status);
1598 if (++loops > 100) { 1808 if (++loops > 100) {
1599 static bool warned = false; 1809 static bool warned = false;
@@ -1608,16 +1818,16 @@ again:
1608 1818
1609 inc_irq_stat(apic_perf_irqs); 1819 inc_irq_stat(apic_perf_irqs);
1610 1820
1611 intel_pmu_lbr_read();
1612 1821
1613 /* 1822 /*
1614 * CondChgd bit 63 doesn't mean any overflow status. Ignore 1823 * Ignore a range of extra bits in status that do not indicate
1615 * and clear the bit. 1824 * overflow by themselves.
1616 */ 1825 */
1617 if (__test_and_clear_bit(63, (unsigned long *)&status)) { 1826 status &= ~(GLOBAL_STATUS_COND_CHG |
1618 if (!status) 1827 GLOBAL_STATUS_ASIF |
1619 goto done; 1828 GLOBAL_STATUS_LBRS_FROZEN);
1620 } 1829 if (!status)
1830 goto done;
1621 1831
1622 /* 1832 /*
1623 * PEBS overflow sets bit 62 in the global status register 1833 * PEBS overflow sets bit 62 in the global status register
@@ -1699,18 +1909,22 @@ intel_bts_constraints(struct perf_event *event)
1699 return NULL; 1909 return NULL;
1700} 1910}
1701 1911
1702static int intel_alt_er(int idx) 1912static int intel_alt_er(int idx, u64 config)
1703{ 1913{
1914 int alt_idx;
1704 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) 1915 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
1705 return idx; 1916 return idx;
1706 1917
1707 if (idx == EXTRA_REG_RSP_0) 1918 if (idx == EXTRA_REG_RSP_0)
1708 return EXTRA_REG_RSP_1; 1919 alt_idx = EXTRA_REG_RSP_1;
1709 1920
1710 if (idx == EXTRA_REG_RSP_1) 1921 if (idx == EXTRA_REG_RSP_1)
1711 return EXTRA_REG_RSP_0; 1922 alt_idx = EXTRA_REG_RSP_0;
1712 1923
1713 return idx; 1924 if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
1925 return idx;
1926
1927 return alt_idx;
1714} 1928}
1715 1929
1716static void intel_fixup_er(struct perf_event *event, int idx) 1930static void intel_fixup_er(struct perf_event *event, int idx)
@@ -1799,7 +2013,7 @@ again:
1799 */ 2013 */
1800 c = NULL; 2014 c = NULL;
1801 } else { 2015 } else {
1802 idx = intel_alt_er(idx); 2016 idx = intel_alt_er(idx, reg->config);
1803 if (idx != reg->idx) { 2017 if (idx != reg->idx) {
1804 raw_spin_unlock_irqrestore(&era->lock, flags); 2018 raw_spin_unlock_irqrestore(&era->lock, flags);
1805 goto again; 2019 goto again;
@@ -2253,6 +2467,15 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
2253 } 2467 }
2254} 2468}
2255 2469
2470static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
2471{
2472 unsigned long flags = x86_pmu.free_running_flags;
2473
2474 if (event->attr.use_clockid)
2475 flags &= ~PERF_SAMPLE_TIME;
2476 return flags;
2477}
2478
2256static int intel_pmu_hw_config(struct perf_event *event) 2479static int intel_pmu_hw_config(struct perf_event *event)
2257{ 2480{
2258 int ret = x86_pmu_hw_config(event); 2481 int ret = x86_pmu_hw_config(event);
@@ -2263,7 +2486,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
2263 if (event->attr.precise_ip) { 2486 if (event->attr.precise_ip) {
2264 if (!event->attr.freq) { 2487 if (!event->attr.freq) {
2265 event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; 2488 event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
2266 if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS)) 2489 if (!(event->attr.sample_type &
2490 ~intel_pmu_free_running_flags(event)))
2267 event->hw.flags |= PERF_X86_EVENT_FREERUNNING; 2491 event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
2268 } 2492 }
2269 if (x86_pmu.pebs_aliases) 2493 if (x86_pmu.pebs_aliases)
@@ -2694,6 +2918,8 @@ static __initconst const struct x86_pmu core_pmu = {
2694 .event_map = intel_pmu_event_map, 2918 .event_map = intel_pmu_event_map,
2695 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 2919 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2696 .apic = 1, 2920 .apic = 1,
2921 .free_running_flags = PEBS_FREERUNNING_FLAGS,
2922
2697 /* 2923 /*
2698 * Intel PMCs cannot be accessed sanely above 32-bit width, 2924 * Intel PMCs cannot be accessed sanely above 32-bit width,
2699 * so we install an artificial 1<<31 period regardless of 2925 * so we install an artificial 1<<31 period regardless of
@@ -2732,6 +2958,7 @@ static __initconst const struct x86_pmu intel_pmu = {
2732 .event_map = intel_pmu_event_map, 2958 .event_map = intel_pmu_event_map,
2733 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 2959 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2734 .apic = 1, 2960 .apic = 1,
2961 .free_running_flags = PEBS_FREERUNNING_FLAGS,
2735 /* 2962 /*
2736 * Intel PMCs cannot be accessed sanely above 32 bit width, 2963 * Intel PMCs cannot be accessed sanely above 32 bit width,
2737 * so we install an artificial 1<<31 period regardless of 2964 * so we install an artificial 1<<31 period regardless of
@@ -3269,6 +3496,29 @@ __init int intel_pmu_init(void)
3269 pr_cont("Broadwell events, "); 3496 pr_cont("Broadwell events, ");
3270 break; 3497 break;
3271 3498
3499 case 78: /* 14nm Skylake Mobile */
3500 case 94: /* 14nm Skylake Desktop */
3501 x86_pmu.late_ack = true;
3502 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
3503 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
3504 intel_pmu_lbr_init_skl();
3505
3506 x86_pmu.event_constraints = intel_skl_event_constraints;
3507 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
3508 x86_pmu.extra_regs = intel_skl_extra_regs;
3509 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
3510 /* all extra regs are per-cpu when HT is on */
3511 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
3512 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
3513
3514 x86_pmu.hw_config = hsw_hw_config;
3515 x86_pmu.get_event_constraints = hsw_get_event_constraints;
3516 x86_pmu.cpu_events = hsw_events_attrs;
3517 WARN_ON(!x86_pmu.format_attrs);
3518 x86_pmu.cpu_events = hsw_events_attrs;
3519 pr_cont("Skylake events, ");
3520 break;
3521
3272 default: 3522 default:
3273 switch (x86_pmu.version) { 3523 switch (x86_pmu.version) {
3274 case 1: 3524 case 1:
@@ -3338,7 +3588,7 @@ __init int intel_pmu_init(void)
3338 */ 3588 */
3339 if (x86_pmu.extra_regs) { 3589 if (x86_pmu.extra_regs) {
3340 for (er = x86_pmu.extra_regs; er->msr; er++) { 3590 for (er = x86_pmu.extra_regs; er->msr; er++) {
3341 er->extra_msr_access = check_msr(er->msr, 0x1ffUL); 3591 er->extra_msr_access = check_msr(er->msr, 0x11UL);
3342 /* Disable LBR select mapping */ 3592 /* Disable LBR select mapping */
3343 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) 3593 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
3344 x86_pmu.lbr_sel_map = NULL; 3594 x86_pmu.lbr_sel_map = NULL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 43dd672d788b..54690e885759 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -62,9 +62,6 @@ struct bts_buffer {
62 62
63struct pmu bts_pmu; 63struct pmu bts_pmu;
64 64
65void intel_pmu_enable_bts(u64 config);
66void intel_pmu_disable_bts(void);
67
68static size_t buf_size(struct page *page) 65static size_t buf_size(struct page *page)
69{ 66{
70 return 1 << (PAGE_SHIFT + page_private(page)); 67 return 1 << (PAGE_SHIFT + page_private(page));
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 71fc40238843..84f236ab96b0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -224,6 +224,19 @@ union hsw_tsx_tuning {
224 224
225#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL 225#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
226 226
227/* Same as HSW, plus TSC */
228
229struct pebs_record_skl {
230 u64 flags, ip;
231 u64 ax, bx, cx, dx;
232 u64 si, di, bp, sp;
233 u64 r8, r9, r10, r11;
234 u64 r12, r13, r14, r15;
235 u64 status, dla, dse, lat;
236 u64 real_ip, tsx_tuning;
237 u64 tsc;
238};
239
227void init_debug_store_on_cpu(int cpu) 240void init_debug_store_on_cpu(int cpu)
228{ 241{
229 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 242 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -675,6 +688,28 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
675 EVENT_CONSTRAINT_END 688 EVENT_CONSTRAINT_END
676}; 689};
677 690
691struct event_constraint intel_skl_pebs_event_constraints[] = {
692 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
693 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
694 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
695 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
696 INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
697 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
698 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
699 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
700 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
701 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
702 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
703 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
704 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
705 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
706 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
707 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
708 /* Allow all events as PEBS with no flags */
709 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
710 EVENT_CONSTRAINT_END
711};
712
678struct event_constraint *intel_pebs_constraints(struct perf_event *event) 713struct event_constraint *intel_pebs_constraints(struct perf_event *event)
679{ 714{
680 struct event_constraint *c; 715 struct event_constraint *c;
@@ -754,6 +789,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
754 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 789 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
755 struct hw_perf_event *hwc = &event->hw; 790 struct hw_perf_event *hwc = &event->hw;
756 struct debug_store *ds = cpuc->ds; 791 struct debug_store *ds = cpuc->ds;
792 bool large_pebs = ds->pebs_interrupt_threshold >
793 ds->pebs_buffer_base + x86_pmu.pebs_record_size;
794
795 if (large_pebs)
796 intel_pmu_drain_pebs_buffer();
757 797
758 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 798 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
759 799
@@ -762,12 +802,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
762 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 802 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
763 cpuc->pebs_enabled &= ~(1ULL << 63); 803 cpuc->pebs_enabled &= ~(1ULL << 63);
764 804
765 if (ds->pebs_interrupt_threshold > 805 if (large_pebs && !pebs_is_enabled(cpuc))
766 ds->pebs_buffer_base + x86_pmu.pebs_record_size) { 806 perf_sched_cb_dec(event->ctx->pmu);
767 intel_pmu_drain_pebs_buffer();
768 if (!pebs_is_enabled(cpuc))
769 perf_sched_cb_dec(event->ctx->pmu);
770 }
771 807
772 if (cpuc->enabled) 808 if (cpuc->enabled)
773 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 809 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -885,7 +921,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
885 return 0; 921 return 0;
886} 922}
887 923
888static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) 924static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
889{ 925{
890 if (pebs->tsx_tuning) { 926 if (pebs->tsx_tuning) {
891 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; 927 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
@@ -894,7 +930,7 @@ static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
894 return 0; 930 return 0;
895} 931}
896 932
897static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) 933static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
898{ 934{
899 u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; 935 u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
900 936
@@ -918,7 +954,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
918 * unconditionally access the 'extra' entries. 954 * unconditionally access the 'extra' entries.
919 */ 955 */
920 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 956 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
921 struct pebs_record_hsw *pebs = __pebs; 957 struct pebs_record_skl *pebs = __pebs;
922 u64 sample_type; 958 u64 sample_type;
923 int fll, fst, dsrc; 959 int fll, fst, dsrc;
924 int fl = event->hw.flags; 960 int fl = event->hw.flags;
@@ -1016,6 +1052,16 @@ static void setup_pebs_sample_data(struct perf_event *event,
1016 data->txn = intel_hsw_transaction(pebs); 1052 data->txn = intel_hsw_transaction(pebs);
1017 } 1053 }
1018 1054
1055 /*
1056 * v3 supplies an accurate time stamp, so we use that
1057 * for the time stamp.
1058 *
1059 * We can only do this for the default trace clock.
1060 */
1061 if (x86_pmu.intel_cap.pebs_format >= 3 &&
1062 event->attr.use_clockid == 0)
1063 data->time = native_sched_clock_from_tsc(pebs->tsc);
1064
1019 if (has_branch_stack(event)) 1065 if (has_branch_stack(event))
1020 data->br_stack = &cpuc->lbr_stack; 1066 data->br_stack = &cpuc->lbr_stack;
1021} 1067}
@@ -1142,6 +1188,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1142 1188
1143 for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1189 for (at = base; at < top; at += x86_pmu.pebs_record_size) {
1144 struct pebs_record_nhm *p = at; 1190 struct pebs_record_nhm *p = at;
1191 u64 pebs_status;
1145 1192
1146 /* PEBS v3 has accurate status bits */ 1193 /* PEBS v3 has accurate status bits */
1147 if (x86_pmu.intel_cap.pebs_format >= 3) { 1194 if (x86_pmu.intel_cap.pebs_format >= 3) {
@@ -1152,12 +1199,17 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1152 continue; 1199 continue;
1153 } 1200 }
1154 1201
1155 bit = find_first_bit((unsigned long *)&p->status, 1202 pebs_status = p->status & cpuc->pebs_enabled;
1203 pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
1204
1205 bit = find_first_bit((unsigned long *)&pebs_status,
1156 x86_pmu.max_pebs_events); 1206 x86_pmu.max_pebs_events);
1157 if (bit >= x86_pmu.max_pebs_events) 1207 if (WARN(bit >= x86_pmu.max_pebs_events,
1158 continue; 1208 "PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx",
1159 if (!test_bit(bit, cpuc->active_mask)) 1209 (unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled,
1210 *(unsigned long long *)cpuc->active_mask))
1160 continue; 1211 continue;
1212
1161 /* 1213 /*
1162 * The PEBS hardware does not deal well with the situation 1214 * The PEBS hardware does not deal well with the situation
1163 * when events happen near to each other and multiple bits 1215 * when events happen near to each other and multiple bits
@@ -1172,27 +1224,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1172 * one, and it's not possible to reconstruct all events 1224 * one, and it's not possible to reconstruct all events
1173 * that caused the PEBS record. It's called collision. 1225 * that caused the PEBS record. It's called collision.
1174 * If collision happened, the record will be dropped. 1226 * If collision happened, the record will be dropped.
1175 *
1176 */ 1227 */
1177 if (p->status != (1 << bit)) { 1228 if (p->status != (1ULL << bit)) {
1178 u64 pebs_status; 1229 for_each_set_bit(i, (unsigned long *)&pebs_status,
1179 1230 x86_pmu.max_pebs_events)
1180 /* slow path */ 1231 error[i]++;
1181 pebs_status = p->status & cpuc->pebs_enabled; 1232 continue;
1182 pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
1183 if (pebs_status != (1 << bit)) {
1184 for_each_set_bit(i, (unsigned long *)&pebs_status,
1185 MAX_PEBS_EVENTS)
1186 error[i]++;
1187 continue;
1188 }
1189 } 1233 }
1234
1190 counts[bit]++; 1235 counts[bit]++;
1191 } 1236 }
1192 1237
1193 for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) { 1238 for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
1194 if ((counts[bit] == 0) && (error[bit] == 0)) 1239 if ((counts[bit] == 0) && (error[bit] == 0))
1195 continue; 1240 continue;
1241
1196 event = cpuc->events[bit]; 1242 event = cpuc->events[bit];
1197 WARN_ON_ONCE(!event); 1243 WARN_ON_ONCE(!event);
1198 WARN_ON_ONCE(!event->attr.precise_ip); 1244 WARN_ON_ONCE(!event->attr.precise_ip);
@@ -1245,6 +1291,14 @@ void __init intel_ds_init(void)
1245 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1291 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1246 break; 1292 break;
1247 1293
1294 case 3:
1295 pr_cont("PEBS fmt3%c, ", pebs_type);
1296 x86_pmu.pebs_record_size =
1297 sizeof(struct pebs_record_skl);
1298 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1299 x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
1300 break;
1301
1248 default: 1302 default:
1249 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 1303 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
1250 x86_pmu.pebs = 0; 1304 x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 452a7bd2dedb..b2c9475b7ff2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -13,7 +13,8 @@ enum {
13 LBR_FORMAT_EIP = 0x02, 13 LBR_FORMAT_EIP = 0x02,
14 LBR_FORMAT_EIP_FLAGS = 0x03, 14 LBR_FORMAT_EIP_FLAGS = 0x03,
15 LBR_FORMAT_EIP_FLAGS2 = 0x04, 15 LBR_FORMAT_EIP_FLAGS2 = 0x04,
16 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2, 16 LBR_FORMAT_INFO = 0x05,
17 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
17}; 18};
18 19
19static enum { 20static enum {
@@ -140,6 +141,13 @@ static void __intel_pmu_lbr_enable(bool pmi)
140 u64 debugctl, lbr_select = 0, orig_debugctl; 141 u64 debugctl, lbr_select = 0, orig_debugctl;
141 142
142 /* 143 /*
144 * No need to unfreeze manually, as v4 can do that as part
145 * of the GLOBAL_STATUS ack.
146 */
147 if (pmi && x86_pmu.version >= 4)
148 return;
149
150 /*
143 * No need to reprogram LBR_SELECT in a PMI, as it 151 * No need to reprogram LBR_SELECT in a PMI, as it
144 * did not change. 152 * did not change.
145 */ 153 */
@@ -186,6 +194,8 @@ static void intel_pmu_lbr_reset_64(void)
186 for (i = 0; i < x86_pmu.lbr_nr; i++) { 194 for (i = 0; i < x86_pmu.lbr_nr; i++) {
187 wrmsrl(x86_pmu.lbr_from + i, 0); 195 wrmsrl(x86_pmu.lbr_from + i, 0);
188 wrmsrl(x86_pmu.lbr_to + i, 0); 196 wrmsrl(x86_pmu.lbr_to + i, 0);
197 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
198 wrmsrl(MSR_LBR_INFO_0 + i, 0);
189 } 199 }
190} 200}
191 201
@@ -230,10 +240,12 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
230 240
231 mask = x86_pmu.lbr_nr - 1; 241 mask = x86_pmu.lbr_nr - 1;
232 tos = intel_pmu_lbr_tos(); 242 tos = intel_pmu_lbr_tos();
233 for (i = 0; i < x86_pmu.lbr_nr; i++) { 243 for (i = 0; i < tos; i++) {
234 lbr_idx = (tos - i) & mask; 244 lbr_idx = (tos - i) & mask;
235 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 245 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
236 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 246 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
247 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
248 wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
237 } 249 }
238 task_ctx->lbr_stack_state = LBR_NONE; 250 task_ctx->lbr_stack_state = LBR_NONE;
239} 251}
@@ -251,10 +263,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
251 263
252 mask = x86_pmu.lbr_nr - 1; 264 mask = x86_pmu.lbr_nr - 1;
253 tos = intel_pmu_lbr_tos(); 265 tos = intel_pmu_lbr_tos();
254 for (i = 0; i < x86_pmu.lbr_nr; i++) { 266 for (i = 0; i < tos; i++) {
255 lbr_idx = (tos - i) & mask; 267 lbr_idx = (tos - i) & mask;
256 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 268 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
257 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 269 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
270 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
271 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
258 } 272 }
259 task_ctx->lbr_stack_state = LBR_VALID; 273 task_ctx->lbr_stack_state = LBR_VALID;
260} 274}
@@ -411,16 +425,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
411 u64 tos = intel_pmu_lbr_tos(); 425 u64 tos = intel_pmu_lbr_tos();
412 int i; 426 int i;
413 int out = 0; 427 int out = 0;
428 int num = x86_pmu.lbr_nr;
414 429
415 for (i = 0; i < x86_pmu.lbr_nr; i++) { 430 if (cpuc->lbr_sel->config & LBR_CALL_STACK)
431 num = tos;
432
433 for (i = 0; i < num; i++) {
416 unsigned long lbr_idx = (tos - i) & mask; 434 unsigned long lbr_idx = (tos - i) & mask;
417 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 435 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
418 int skip = 0; 436 int skip = 0;
437 u16 cycles = 0;
419 int lbr_flags = lbr_desc[lbr_format]; 438 int lbr_flags = lbr_desc[lbr_format];
420 439
421 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 440 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
422 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 441 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
423 442
443 if (lbr_format == LBR_FORMAT_INFO) {
444 u64 info;
445
446 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
447 mis = !!(info & LBR_INFO_MISPRED);
448 pred = !mis;
449 in_tx = !!(info & LBR_INFO_IN_TX);
450 abort = !!(info & LBR_INFO_ABORT);
451 cycles = (info & LBR_INFO_CYCLES);
452 }
424 if (lbr_flags & LBR_EIP_FLAGS) { 453 if (lbr_flags & LBR_EIP_FLAGS) {
425 mis = !!(from & LBR_FROM_FLAG_MISPRED); 454 mis = !!(from & LBR_FROM_FLAG_MISPRED);
426 pred = !mis; 455 pred = !mis;
@@ -450,6 +479,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
450 cpuc->lbr_entries[out].predicted = pred; 479 cpuc->lbr_entries[out].predicted = pred;
451 cpuc->lbr_entries[out].in_tx = in_tx; 480 cpuc->lbr_entries[out].in_tx = in_tx;
452 cpuc->lbr_entries[out].abort = abort; 481 cpuc->lbr_entries[out].abort = abort;
482 cpuc->lbr_entries[out].cycles = cycles;
453 cpuc->lbr_entries[out].reserved = 0; 483 cpuc->lbr_entries[out].reserved = 0;
454 out++; 484 out++;
455 } 485 }
@@ -947,6 +977,26 @@ void intel_pmu_lbr_init_hsw(void)
947 pr_cont("16-deep LBR, "); 977 pr_cont("16-deep LBR, ");
948} 978}
949 979
980/* skylake */
981__init void intel_pmu_lbr_init_skl(void)
982{
983 x86_pmu.lbr_nr = 32;
984 x86_pmu.lbr_tos = MSR_LBR_TOS;
985 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
986 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
987
988 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
989 x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
990
991 /*
992 * SW branch filter usage:
993 * - support syscall, sysret capture.
994 * That requires LBR_FAR but that means far
995 * jmp need to be filtered out
996 */
997 pr_cont("32-deep LBR, ");
998}
999
950/* atom */ 1000/* atom */
951void __init intel_pmu_lbr_init_atom(void) 1001void __init intel_pmu_lbr_init_atom(void)
952{ 1002{
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 183de719628d..42169283448b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -65,15 +65,21 @@ static struct pt_cap_desc {
65} pt_caps[] = { 65} pt_caps[] = {
66 PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff), 66 PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
67 PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)), 67 PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
68 PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
69 PT_CAP(mtc, 0, CR_EBX, BIT(3)),
68 PT_CAP(topa_output, 0, CR_ECX, BIT(0)), 70 PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
69 PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), 71 PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
72 PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
70 PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)), 73 PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
74 PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
75 PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
76 PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
71}; 77};
72 78
73static u32 pt_cap_get(enum pt_capabilities cap) 79static u32 pt_cap_get(enum pt_capabilities cap)
74{ 80{
75 struct pt_cap_desc *cd = &pt_caps[cap]; 81 struct pt_cap_desc *cd = &pt_caps[cap];
76 u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg]; 82 u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
77 unsigned int shift = __ffs(cd->mask); 83 unsigned int shift = __ffs(cd->mask);
78 84
79 return (c & cd->mask) >> shift; 85 return (c & cd->mask) >> shift;
@@ -94,12 +100,22 @@ static struct attribute_group pt_cap_group = {
94 .name = "caps", 100 .name = "caps",
95}; 101};
96 102
103PMU_FORMAT_ATTR(cyc, "config:1" );
104PMU_FORMAT_ATTR(mtc, "config:9" );
97PMU_FORMAT_ATTR(tsc, "config:10" ); 105PMU_FORMAT_ATTR(tsc, "config:10" );
98PMU_FORMAT_ATTR(noretcomp, "config:11" ); 106PMU_FORMAT_ATTR(noretcomp, "config:11" );
107PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
108PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
109PMU_FORMAT_ATTR(psb_period, "config:24-27" );
99 110
100static struct attribute *pt_formats_attr[] = { 111static struct attribute *pt_formats_attr[] = {
112 &format_attr_cyc.attr,
113 &format_attr_mtc.attr,
101 &format_attr_tsc.attr, 114 &format_attr_tsc.attr,
102 &format_attr_noretcomp.attr, 115 &format_attr_noretcomp.attr,
116 &format_attr_mtc_period.attr,
117 &format_attr_cyc_thresh.attr,
118 &format_attr_psb_period.attr,
103 NULL, 119 NULL,
104}; 120};
105 121
@@ -129,10 +145,10 @@ static int __init pt_pmu_hw_init(void)
129 145
130 for (i = 0; i < PT_CPUID_LEAVES; i++) { 146 for (i = 0; i < PT_CPUID_LEAVES; i++) {
131 cpuid_count(20, i, 147 cpuid_count(20, i,
132 &pt_pmu.caps[CR_EAX + i*4], 148 &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
133 &pt_pmu.caps[CR_EBX + i*4], 149 &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
134 &pt_pmu.caps[CR_ECX + i*4], 150 &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
135 &pt_pmu.caps[CR_EDX + i*4]); 151 &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
136 } 152 }
137 153
138 ret = -ENOMEM; 154 ret = -ENOMEM;
@@ -170,15 +186,65 @@ fail:
170 return ret; 186 return ret;
171} 187}
172 188
173#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC) 189#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \
190 RTIT_CTL_CYC_THRESH | \
191 RTIT_CTL_PSB_FREQ)
192
193#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
194 RTIT_CTL_MTC_RANGE)
195
196#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
197 RTIT_CTL_DISRETC | \
198 RTIT_CTL_CYC_PSB | \
199 RTIT_CTL_MTC)
174 200
175static bool pt_event_valid(struct perf_event *event) 201static bool pt_event_valid(struct perf_event *event)
176{ 202{
177 u64 config = event->attr.config; 203 u64 config = event->attr.config;
204 u64 allowed, requested;
178 205
179 if ((config & PT_CONFIG_MASK) != config) 206 if ((config & PT_CONFIG_MASK) != config)
180 return false; 207 return false;
181 208
209 if (config & RTIT_CTL_CYC_PSB) {
210 if (!pt_cap_get(PT_CAP_psb_cyc))
211 return false;
212
213 allowed = pt_cap_get(PT_CAP_psb_periods);
214 requested = (config & RTIT_CTL_PSB_FREQ) >>
215 RTIT_CTL_PSB_FREQ_OFFSET;
216 if (requested && (!(allowed & BIT(requested))))
217 return false;
218
219 allowed = pt_cap_get(PT_CAP_cycle_thresholds);
220 requested = (config & RTIT_CTL_CYC_THRESH) >>
221 RTIT_CTL_CYC_THRESH_OFFSET;
222 if (requested && (!(allowed & BIT(requested))))
223 return false;
224 }
225
226 if (config & RTIT_CTL_MTC) {
227 /*
228 * In the unlikely case that CPUID lists valid mtc periods,
229 * but not the mtc capability, drop out here.
230 *
231 * Spec says that setting mtc period bits while mtc bit in
232 * CPUID is 0 will #GP, so better safe than sorry.
233 */
234 if (!pt_cap_get(PT_CAP_mtc))
235 return false;
236
237 allowed = pt_cap_get(PT_CAP_mtc_periods);
238 if (!allowed)
239 return false;
240
241 requested = (config & RTIT_CTL_MTC_RANGE) >>
242 RTIT_CTL_MTC_RANGE_OFFSET;
243
244 if (!(allowed & BIT(requested)))
245 return false;
246 }
247
182 return true; 248 return true;
183} 249}
184 250
@@ -191,6 +257,11 @@ static void pt_config(struct perf_event *event)
191{ 257{
192 u64 reg; 258 u64 reg;
193 259
260 if (!event->hw.itrace_started) {
261 event->hw.itrace_started = 1;
262 wrmsrl(MSR_IA32_RTIT_STATUS, 0);
263 }
264
194 reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; 265 reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
195 266
196 if (!event->attr.exclude_kernel) 267 if (!event->attr.exclude_kernel)
@@ -910,7 +981,6 @@ void intel_pt_interrupt(void)
910 981
911 pt_config_buffer(buf->cur->table, buf->cur_idx, 982 pt_config_buffer(buf->cur->table, buf->cur_idx,
912 buf->output_off); 983 buf->output_off);
913 wrmsrl(MSR_IA32_RTIT_STATUS, 0);
914 pt_config(event); 984 pt_config(event);
915 } 985 }
916} 986}
@@ -934,7 +1004,6 @@ static void pt_event_start(struct perf_event *event, int mode)
934 1004
935 pt_config_buffer(buf->cur->table, buf->cur_idx, 1005 pt_config_buffer(buf->cur->table, buf->cur_idx,
936 buf->output_off); 1006 buf->output_off);
937 wrmsrl(MSR_IA32_RTIT_STATUS, 0);
938 pt_config(event); 1007 pt_config(event);
939} 1008}
940 1009
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 5cbd4e64feb5..81431c0f0614 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -86,6 +86,10 @@ static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
86 1<<RAPL_IDX_RAM_NRG_STAT|\ 86 1<<RAPL_IDX_RAM_NRG_STAT|\
87 1<<RAPL_IDX_PP1_NRG_STAT) 87 1<<RAPL_IDX_PP1_NRG_STAT)
88 88
89/* Knights Landing has PKG, RAM */
90#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
91 1<<RAPL_IDX_RAM_NRG_STAT)
92
89/* 93/*
90 * event code: LSB 8 bits, passed in attr->config 94 * event code: LSB 8 bits, passed in attr->config
91 * any other bit is reserved 95 * any other bit is reserved
@@ -486,6 +490,18 @@ static struct attribute *rapl_events_hsw_attr[] = {
486 NULL, 490 NULL,
487}; 491};
488 492
493static struct attribute *rapl_events_knl_attr[] = {
494 EVENT_PTR(rapl_pkg),
495 EVENT_PTR(rapl_ram),
496
497 EVENT_PTR(rapl_pkg_unit),
498 EVENT_PTR(rapl_ram_unit),
499
500 EVENT_PTR(rapl_pkg_scale),
501 EVENT_PTR(rapl_ram_scale),
502 NULL,
503};
504
489static struct attribute_group rapl_pmu_events_group = { 505static struct attribute_group rapl_pmu_events_group = {
490 .name = "events", 506 .name = "events",
491 .attrs = NULL, /* patched at runtime */ 507 .attrs = NULL, /* patched at runtime */
@@ -730,6 +746,10 @@ static int __init rapl_pmu_init(void)
730 rapl_cntr_mask = RAPL_IDX_SRV; 746 rapl_cntr_mask = RAPL_IDX_SRV;
731 rapl_pmu_events_group.attrs = rapl_events_srv_attr; 747 rapl_pmu_events_group.attrs = rapl_events_srv_attr;
732 break; 748 break;
749 case 87: /* Knights Landing */
750 rapl_add_quirk(rapl_hsw_server_quirk);
751 rapl_cntr_mask = RAPL_IDX_KNL;
752 rapl_pmu_events_group.attrs = rapl_events_knl_attr;
733 753
734 default: 754 default:
735 /* unsupported */ 755 /* unsupported */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 21b5e38c921b..560e5255b15e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -911,6 +911,9 @@ static int __init uncore_pci_init(void)
911 case 63: /* Haswell-EP */ 911 case 63: /* Haswell-EP */
912 ret = hswep_uncore_pci_init(); 912 ret = hswep_uncore_pci_init();
913 break; 913 break;
914 case 86: /* BDX-DE */
915 ret = bdx_uncore_pci_init();
916 break;
914 case 42: /* Sandy Bridge */ 917 case 42: /* Sandy Bridge */
915 ret = snb_uncore_pci_init(); 918 ret = snb_uncore_pci_init();
916 break; 919 break;
@@ -1209,6 +1212,11 @@ static int __init uncore_cpu_init(void)
1209 break; 1212 break;
1210 case 42: /* Sandy Bridge */ 1213 case 42: /* Sandy Bridge */
1211 case 58: /* Ivy Bridge */ 1214 case 58: /* Ivy Bridge */
1215 case 60: /* Haswell */
1216 case 69: /* Haswell */
1217 case 70: /* Haswell */
1218 case 61: /* Broadwell */
1219 case 71: /* Broadwell */
1212 snb_uncore_cpu_init(); 1220 snb_uncore_cpu_init();
1213 break; 1221 break;
1214 case 45: /* Sandy Bridge-EP */ 1222 case 45: /* Sandy Bridge-EP */
@@ -1224,6 +1232,9 @@ static int __init uncore_cpu_init(void)
1224 case 63: /* Haswell-EP */ 1232 case 63: /* Haswell-EP */
1225 hswep_uncore_cpu_init(); 1233 hswep_uncore_cpu_init();
1226 break; 1234 break;
1235 case 86: /* BDX-DE */
1236 bdx_uncore_cpu_init();
1237 break;
1227 default: 1238 default:
1228 return 0; 1239 return 0;
1229 } 1240 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 0f77f0a196e4..72c54c2e5b1a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,8 @@ int ivbep_uncore_pci_init(void);
336void ivbep_uncore_cpu_init(void); 336void ivbep_uncore_cpu_init(void);
337int hswep_uncore_pci_init(void); 337int hswep_uncore_pci_init(void);
338void hswep_uncore_cpu_init(void); 338void hswep_uncore_cpu_init(void);
339int bdx_uncore_pci_init(void);
340void bdx_uncore_cpu_init(void);
339 341
340/* perf_event_intel_uncore_nhmex.c */ 342/* perf_event_intel_uncore_nhmex.c */
341void nhmex_uncore_cpu_init(void); 343void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index b005a78c7012..f78574b3cb55 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -45,6 +45,11 @@
45#define SNB_UNC_CBO_0_PER_CTR0 0x706 45#define SNB_UNC_CBO_0_PER_CTR0 0x706
46#define SNB_UNC_CBO_MSR_OFFSET 0x10 46#define SNB_UNC_CBO_MSR_OFFSET 0x10
47 47
48/* SNB ARB register */
49#define SNB_UNC_ARB_PER_CTR0 0x3b0
50#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2
51#define SNB_UNC_ARB_MSR_OFFSET 0x10
52
48/* NHM global control register */ 53/* NHM global control register */
49#define NHM_UNC_PERF_GLOBAL_CTL 0x391 54#define NHM_UNC_PERF_GLOBAL_CTL 0x391
50#define NHM_UNC_FIXED_CTR 0x394 55#define NHM_UNC_FIXED_CTR 0x394
@@ -115,7 +120,7 @@ static struct intel_uncore_ops snb_uncore_msr_ops = {
115 .read_counter = uncore_msr_read_counter, 120 .read_counter = uncore_msr_read_counter,
116}; 121};
117 122
118static struct event_constraint snb_uncore_cbox_constraints[] = { 123static struct event_constraint snb_uncore_arb_constraints[] = {
119 UNCORE_EVENT_CONSTRAINT(0x80, 0x1), 124 UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
120 UNCORE_EVENT_CONSTRAINT(0x83, 0x1), 125 UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
121 EVENT_CONSTRAINT_END 126 EVENT_CONSTRAINT_END
@@ -134,14 +139,28 @@ static struct intel_uncore_type snb_uncore_cbox = {
134 .single_fixed = 1, 139 .single_fixed = 1,
135 .event_mask = SNB_UNC_RAW_EVENT_MASK, 140 .event_mask = SNB_UNC_RAW_EVENT_MASK,
136 .msr_offset = SNB_UNC_CBO_MSR_OFFSET, 141 .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
137 .constraints = snb_uncore_cbox_constraints,
138 .ops = &snb_uncore_msr_ops, 142 .ops = &snb_uncore_msr_ops,
139 .format_group = &snb_uncore_format_group, 143 .format_group = &snb_uncore_format_group,
140 .event_descs = snb_uncore_events, 144 .event_descs = snb_uncore_events,
141}; 145};
142 146
147static struct intel_uncore_type snb_uncore_arb = {
148 .name = "arb",
149 .num_counters = 2,
150 .num_boxes = 1,
151 .perf_ctr_bits = 44,
152 .perf_ctr = SNB_UNC_ARB_PER_CTR0,
153 .event_ctl = SNB_UNC_ARB_PERFEVTSEL0,
154 .event_mask = SNB_UNC_RAW_EVENT_MASK,
155 .msr_offset = SNB_UNC_ARB_MSR_OFFSET,
156 .constraints = snb_uncore_arb_constraints,
157 .ops = &snb_uncore_msr_ops,
158 .format_group = &snb_uncore_format_group,
159};
160
143static struct intel_uncore_type *snb_msr_uncores[] = { 161static struct intel_uncore_type *snb_msr_uncores[] = {
144 &snb_uncore_cbox, 162 &snb_uncore_cbox,
163 &snb_uncore_arb,
145 NULL, 164 NULL,
146}; 165};
147 166
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 6d6e85dd5849..694510a887dc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -2215,7 +2215,7 @@ static struct intel_uncore_type *hswep_pci_uncores[] = {
2215 NULL, 2215 NULL,
2216}; 2216};
2217 2217
2218static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = { 2218static const struct pci_device_id hswep_uncore_pci_ids[] = {
2219 { /* Home Agent 0 */ 2219 { /* Home Agent 0 */
2220 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30), 2220 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
2221 .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0), 2221 .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
@@ -2321,3 +2321,167 @@ int hswep_uncore_pci_init(void)
2321 return 0; 2321 return 0;
2322} 2322}
2323/* end of Haswell-EP uncore support */ 2323/* end of Haswell-EP uncore support */
2324
2325/* BDX-DE uncore support */
2326
2327static struct intel_uncore_type bdx_uncore_ubox = {
2328 .name = "ubox",
2329 .num_counters = 2,
2330 .num_boxes = 1,
2331 .perf_ctr_bits = 48,
2332 .fixed_ctr_bits = 48,
2333 .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
2334 .event_ctl = HSWEP_U_MSR_PMON_CTL0,
2335 .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
2336 .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
2337 .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
2338 .num_shared_regs = 1,
2339 .ops = &ivbep_uncore_msr_ops,
2340 .format_group = &ivbep_uncore_ubox_format_group,
2341};
2342
2343static struct event_constraint bdx_uncore_cbox_constraints[] = {
2344 UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
2345 UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
2346 UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
2347 EVENT_CONSTRAINT_END
2348};
2349
2350static struct intel_uncore_type bdx_uncore_cbox = {
2351 .name = "cbox",
2352 .num_counters = 4,
2353 .num_boxes = 8,
2354 .perf_ctr_bits = 48,
2355 .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
2356 .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
2357 .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
2358 .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
2359 .msr_offset = HSWEP_CBO_MSR_OFFSET,
2360 .num_shared_regs = 1,
2361 .constraints = bdx_uncore_cbox_constraints,
2362 .ops = &hswep_uncore_cbox_ops,
2363 .format_group = &hswep_uncore_cbox_format_group,
2364};
2365
2366static struct intel_uncore_type *bdx_msr_uncores[] = {
2367 &bdx_uncore_ubox,
2368 &bdx_uncore_cbox,
2369 &hswep_uncore_pcu,
2370 NULL,
2371};
2372
2373void bdx_uncore_cpu_init(void)
2374{
2375 if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
2376 bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
2377 uncore_msr_uncores = bdx_msr_uncores;
2378}
2379
2380static struct intel_uncore_type bdx_uncore_ha = {
2381 .name = "ha",
2382 .num_counters = 4,
2383 .num_boxes = 1,
2384 .perf_ctr_bits = 48,
2385 SNBEP_UNCORE_PCI_COMMON_INIT(),
2386};
2387
2388static struct intel_uncore_type bdx_uncore_imc = {
2389 .name = "imc",
2390 .num_counters = 5,
2391 .num_boxes = 2,
2392 .perf_ctr_bits = 48,
2393 .fixed_ctr_bits = 48,
2394 .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
2395 .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
2396 .event_descs = hswep_uncore_imc_events,
2397 SNBEP_UNCORE_PCI_COMMON_INIT(),
2398};
2399
2400static struct intel_uncore_type bdx_uncore_irp = {
2401 .name = "irp",
2402 .num_counters = 4,
2403 .num_boxes = 1,
2404 .perf_ctr_bits = 48,
2405 .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
2406 .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
2407 .ops = &hswep_uncore_irp_ops,
2408 .format_group = &snbep_uncore_format_group,
2409};
2410
2411
2412static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
2413 UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
2414 UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
2415 UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
2416 UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
2417 UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
2418 UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
2419 UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
2420 EVENT_CONSTRAINT_END
2421};
2422
2423static struct intel_uncore_type bdx_uncore_r2pcie = {
2424 .name = "r2pcie",
2425 .num_counters = 4,
2426 .num_boxes = 1,
2427 .perf_ctr_bits = 48,
2428 .constraints = bdx_uncore_r2pcie_constraints,
2429 SNBEP_UNCORE_PCI_COMMON_INIT(),
2430};
2431
2432enum {
2433 BDX_PCI_UNCORE_HA,
2434 BDX_PCI_UNCORE_IMC,
2435 BDX_PCI_UNCORE_IRP,
2436 BDX_PCI_UNCORE_R2PCIE,
2437};
2438
2439static struct intel_uncore_type *bdx_pci_uncores[] = {
2440 [BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
2441 [BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
2442 [BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
2443 [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
2444 NULL,
2445};
2446
2447static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
2448 { /* Home Agent 0 */
2449 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
2450 .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
2451 },
2452 { /* MC0 Channel 0 */
2453 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
2454 .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
2455 },
2456 { /* MC0 Channel 1 */
2457 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
2458 .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
2459 },
2460 { /* IRP */
2461 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
2462 .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
2463 },
2464 { /* R2PCIe */
2465 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
2466 .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
2467 },
2468 { /* end: all zeroes */ }
2469};
2470
2471static struct pci_driver bdx_uncore_pci_driver = {
2472 .name = "bdx_uncore",
2473 .id_table = bdx_uncore_pci_ids,
2474};
2475
2476int bdx_uncore_pci_init(void)
2477{
2478 int ret = snbep_pci2phy_map_init(0x6f1e);
2479
2480 if (ret)
2481 return ret;
2482 uncore_pci_uncores = bdx_pci_uncores;
2483 uncore_pci_driver = &bdx_uncore_pci_driver;
2484 return 0;
2485}
2486
2487/* end of BDX-DE uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
new file mode 100644
index 000000000000..086b12eae794
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -0,0 +1,242 @@
1#include <linux/perf_event.h>
2
3enum perf_msr_id {
4 PERF_MSR_TSC = 0,
5 PERF_MSR_APERF = 1,
6 PERF_MSR_MPERF = 2,
7 PERF_MSR_PPERF = 3,
8 PERF_MSR_SMI = 4,
9
10 PERF_MSR_EVENT_MAX,
11};
12
13bool test_aperfmperf(int idx)
14{
15 return boot_cpu_has(X86_FEATURE_APERFMPERF);
16}
17
18bool test_intel(int idx)
19{
20 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
21 boot_cpu_data.x86 != 6)
22 return false;
23
24 switch (boot_cpu_data.x86_model) {
25 case 30: /* 45nm Nehalem */
26 case 26: /* 45nm Nehalem-EP */
27 case 46: /* 45nm Nehalem-EX */
28
29 case 37: /* 32nm Westmere */
30 case 44: /* 32nm Westmere-EP */
31 case 47: /* 32nm Westmere-EX */
32
33 case 42: /* 32nm SandyBridge */
34 case 45: /* 32nm SandyBridge-E/EN/EP */
35
36 case 58: /* 22nm IvyBridge */
37 case 62: /* 22nm IvyBridge-EP/EX */
38
39 case 60: /* 22nm Haswell Core */
40 case 63: /* 22nm Haswell Server */
41 case 69: /* 22nm Haswell ULT */
42 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
43
44 case 61: /* 14nm Broadwell Core-M */
45 case 86: /* 14nm Broadwell Xeon D */
46 case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
47 case 79: /* 14nm Broadwell Server */
48
49 case 55: /* 22nm Atom "Silvermont" */
50 case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
51 case 76: /* 14nm Atom "Airmont" */
52 if (idx == PERF_MSR_SMI)
53 return true;
54 break;
55
56 case 78: /* 14nm Skylake Mobile */
57 case 94: /* 14nm Skylake Desktop */
58 if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
59 return true;
60 break;
61 }
62
63 return false;
64}
65
66struct perf_msr {
67 u64 msr;
68 struct perf_pmu_events_attr *attr;
69 bool (*test)(int idx);
70};
71
72PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
73PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
74PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
75PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
76PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
77
78static struct perf_msr msr[] = {
79 [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
80 [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
81 [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
82 [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
83 [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
84};
85
86static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
87 NULL,
88};
89
90static struct attribute_group events_attr_group = {
91 .name = "events",
92 .attrs = events_attrs,
93};
94
95PMU_FORMAT_ATTR(event, "config:0-63");
96static struct attribute *format_attrs[] = {
97 &format_attr_event.attr,
98 NULL,
99};
100static struct attribute_group format_attr_group = {
101 .name = "format",
102 .attrs = format_attrs,
103};
104
105static const struct attribute_group *attr_groups[] = {
106 &events_attr_group,
107 &format_attr_group,
108 NULL,
109};
110
111static int msr_event_init(struct perf_event *event)
112{
113 u64 cfg = event->attr.config;
114
115 if (event->attr.type != event->pmu->type)
116 return -ENOENT;
117
118 if (cfg >= PERF_MSR_EVENT_MAX)
119 return -EINVAL;
120
121 /* unsupported modes and filters */
122 if (event->attr.exclude_user ||
123 event->attr.exclude_kernel ||
124 event->attr.exclude_hv ||
125 event->attr.exclude_idle ||
126 event->attr.exclude_host ||
127 event->attr.exclude_guest ||
128 event->attr.sample_period) /* no sampling */
129 return -EINVAL;
130
131 if (!msr[cfg].attr)
132 return -EINVAL;
133
134 event->hw.idx = -1;
135 event->hw.event_base = msr[cfg].msr;
136 event->hw.config = cfg;
137
138 return 0;
139}
140
141static inline u64 msr_read_counter(struct perf_event *event)
142{
143 u64 now;
144
145 if (event->hw.event_base)
146 rdmsrl(event->hw.event_base, now);
147 else
148 rdtscll(now);
149
150 return now;
151}
152static void msr_event_update(struct perf_event *event)
153{
154 u64 prev, now;
155 s64 delta;
156
157 /* Careful, an NMI might modify the previous event value. */
158again:
159 prev = local64_read(&event->hw.prev_count);
160 now = msr_read_counter(event);
161
162 if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
163 goto again;
164
165 delta = now - prev;
166 if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
167 delta <<= 32;
168 delta >>= 32; /* sign extend */
169 }
170 local64_add(now - prev, &event->count);
171}
172
173static void msr_event_start(struct perf_event *event, int flags)
174{
175 u64 now;
176
177 now = msr_read_counter(event);
178 local64_set(&event->hw.prev_count, now);
179}
180
181static void msr_event_stop(struct perf_event *event, int flags)
182{
183 msr_event_update(event);
184}
185
186static void msr_event_del(struct perf_event *event, int flags)
187{
188 msr_event_stop(event, PERF_EF_UPDATE);
189}
190
191static int msr_event_add(struct perf_event *event, int flags)
192{
193 if (flags & PERF_EF_START)
194 msr_event_start(event, flags);
195
196 return 0;
197}
198
199static struct pmu pmu_msr = {
200 .task_ctx_nr = perf_sw_context,
201 .attr_groups = attr_groups,
202 .event_init = msr_event_init,
203 .add = msr_event_add,
204 .del = msr_event_del,
205 .start = msr_event_start,
206 .stop = msr_event_stop,
207 .read = msr_event_update,
208 .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
209};
210
211static int __init msr_init(void)
212{
213 int i, j = 0;
214
215 if (!boot_cpu_has(X86_FEATURE_TSC)) {
216 pr_cont("no MSR PMU driver.\n");
217 return 0;
218 }
219
220 /* Probe the MSRs. */
221 for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
222 u64 val;
223
224 /*
225 * Virt sucks arse; you cannot tell if a R/O MSR is present :/
226 */
227 if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
228 msr[i].attr = NULL;
229 }
230
231 /* List remaining MSRs in the sysfs attrs. */
232 for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
233 if (msr[i].attr)
234 events_attrs[j++] = &msr[i].attr->attr.attr;
235 }
236 events_attrs[j] = NULL;
237
238 perf_pmu_register(&pmu_msr, "msr", -1);
239
240 return 0;
241}
242device_initcall(msr_init);
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 7114ba220fd4..50a3fad5b89f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -32,6 +32,7 @@
32#include <linux/irqflags.h> 32#include <linux/irqflags.h>
33#include <linux/notifier.h> 33#include <linux/notifier.h>
34#include <linux/kallsyms.h> 34#include <linux/kallsyms.h>
35#include <linux/kprobes.h>
35#include <linux/percpu.h> 36#include <linux/percpu.h>
36#include <linux/kdebug.h> 37#include <linux/kdebug.h>
37#include <linux/kernel.h> 38#include <linux/kernel.h>
@@ -179,7 +180,11 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
179 va = info->address; 180 va = info->address;
180 len = bp->attr.bp_len; 181 len = bp->attr.bp_len;
181 182
182 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); 183 /*
184 * We don't need to worry about va + len - 1 overflowing:
185 * we already require that va is aligned to a multiple of len.
186 */
187 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
183} 188}
184 189
185int arch_bp_generic_fields(int x86_len, int x86_type, 190int arch_bp_generic_fields(int x86_len, int x86_type,
@@ -243,6 +248,20 @@ static int arch_build_bp_info(struct perf_event *bp)
243 info->type = X86_BREAKPOINT_RW; 248 info->type = X86_BREAKPOINT_RW;
244 break; 249 break;
245 case HW_BREAKPOINT_X: 250 case HW_BREAKPOINT_X:
251 /*
252 * We don't allow kernel breakpoints in places that are not
253 * acceptable for kprobes. On non-kprobes kernels, we don't
254 * allow kernel breakpoints at all.
255 */
256 if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
257#ifdef CONFIG_KPROBES
258 if (within_kprobe_blacklist(bp->attr.bp_addr))
259 return -EINVAL;
260#else
261 return -EINVAL;
262#endif
263 }
264
246 info->type = X86_BREAKPOINT_EXECUTE; 265 info->type = X86_BREAKPOINT_EXECUTE;
247 /* 266 /*
248 * x86 inst breakpoints need to have a specific undefined len. 267 * x86 inst breakpoints need to have a specific undefined len.
@@ -276,8 +295,18 @@ static int arch_build_bp_info(struct perf_event *bp)
276 break; 295 break;
277#endif 296#endif
278 default: 297 default:
298 /* AMD range breakpoint */
279 if (!is_power_of_2(bp->attr.bp_len)) 299 if (!is_power_of_2(bp->attr.bp_len))
280 return -EINVAL; 300 return -EINVAL;
301 if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
302 return -EINVAL;
303 /*
304 * It's impossible to use a range breakpoint to fake out
305 * user vs kernel detection because bp_len - 1 can't
306 * have the high bit set. If we ever allow range instruction
307 * breakpoints, then we'll have to check for kprobe-blacklisted
308 * addresses anywhere in the range.
309 */
281 if (!cpu_has_bpext) 310 if (!cpu_has_bpext)
282 return -EOPNOTSUPP; 311 return -EOPNOTSUPP;
283 info->mask = bp->attr.bp_len - 1; 312 info->mask = bp->attr.bp_len - 1;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7437b41f6a47..88e9a38c71a5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -296,6 +296,14 @@ u64 native_sched_clock(void)
296 return cycles_2_ns(tsc_now); 296 return cycles_2_ns(tsc_now);
297} 297}
298 298
299/*
300 * Generate a sched_clock if you already have a TSC value.
301 */
302u64 native_sched_clock_from_tsc(u64 tsc)
303{
304 return cycles_2_ns(tsc);
305}
306
299/* We need to define a real function for sched_clock, to override the 307/* We need to define a real function for sched_clock, to override the
300 weak default version */ 308 weak default version */
301#ifdef CONFIG_PARAVIRT 309#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 66476244731e..bf4db6eaec8f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -985,3 +985,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
985 985
986 return -1; 986 return -1;
987} 987}
988
989bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
990 struct pt_regs *regs)
991{
992 if (ctx == RP_CHECK_CALL) /* sp was just decremented by "call" insn */
993 return regs->sp < ret->stack;
994 else
995 return regs->sp <= ret->stack;
996}
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1ab54754a86d..8f6849084248 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -267,6 +267,8 @@ extern void show_registers(struct pt_regs *regs);
267extern void kprobes_inc_nmissed_count(struct kprobe *p); 267extern void kprobes_inc_nmissed_count(struct kprobe *p);
268extern bool arch_within_kprobe_blacklist(unsigned long addr); 268extern bool arch_within_kprobe_blacklist(unsigned long addr);
269 269
270extern bool within_kprobe_blacklist(unsigned long addr);
271
270struct kprobe_insn_cache { 272struct kprobe_insn_cache {
271 struct mutex mutex; 273 struct mutex mutex;
272 void *(*alloc)(void); /* allocate insn page */ 274 void *(*alloc)(void); /* allocate insn page */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 1063c850dbab..ed27917cabc9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -243,6 +243,7 @@ enum {
243 TRACE_EVENT_FL_USE_CALL_FILTER_BIT, 243 TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
244 TRACE_EVENT_FL_TRACEPOINT_BIT, 244 TRACE_EVENT_FL_TRACEPOINT_BIT,
245 TRACE_EVENT_FL_KPROBE_BIT, 245 TRACE_EVENT_FL_KPROBE_BIT,
246 TRACE_EVENT_FL_UPROBE_BIT,
246}; 247};
247 248
248/* 249/*
@@ -257,6 +258,7 @@ enum {
257 * USE_CALL_FILTER - For trace internal events, don't use file filter 258 * USE_CALL_FILTER - For trace internal events, don't use file filter
258 * TRACEPOINT - Event is a tracepoint 259 * TRACEPOINT - Event is a tracepoint
259 * KPROBE - Event is a kprobe 260 * KPROBE - Event is a kprobe
261 * UPROBE - Event is a uprobe
260 */ 262 */
261enum { 263enum {
262 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 264 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -267,8 +269,11 @@ enum {
267 TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), 269 TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
268 TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), 270 TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
269 TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), 271 TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
272 TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
270}; 273};
271 274
275#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
276
272struct trace_event_call { 277struct trace_event_call {
273 struct list_head list; 278 struct list_head list;
274 struct trace_event_class *class; 279 struct trace_event_class *class;
@@ -542,7 +547,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
542 event_triggers_post_call(file, tt); 547 event_triggers_post_call(file, tt);
543} 548}
544 549
545#ifdef CONFIG_BPF_SYSCALL 550#ifdef CONFIG_BPF_EVENTS
546unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); 551unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
547#else 552#else
548static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) 553static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 60beb5dc7977..0bdc72f36905 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -92,6 +92,22 @@ struct uprobe_task {
92 unsigned int depth; 92 unsigned int depth;
93}; 93};
94 94
95struct return_instance {
96 struct uprobe *uprobe;
97 unsigned long func;
98 unsigned long stack; /* stack pointer */
99 unsigned long orig_ret_vaddr; /* original return address */
100 bool chained; /* true, if instance is nested */
101
102 struct return_instance *next; /* keep as stack */
103};
104
105enum rp_check {
106 RP_CHECK_CALL,
107 RP_CHECK_CHAIN_CALL,
108 RP_CHECK_RET,
109};
110
95struct xol_area; 111struct xol_area;
96 112
97struct uprobes_state { 113struct uprobes_state {
@@ -128,6 +144,7 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
128extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); 144extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
129extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); 145extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
130extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); 146extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
147extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs);
131extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); 148extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
132extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, 149extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
133 void *src, unsigned long len); 150 void *src, unsigned long len);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d97f84c080da..2881145cda86 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -330,7 +330,8 @@ struct perf_event_attr {
330 mmap2 : 1, /* include mmap with inode data */ 330 mmap2 : 1, /* include mmap with inode data */
331 comm_exec : 1, /* flag comm events that are due to an exec */ 331 comm_exec : 1, /* flag comm events that are due to an exec */
332 use_clockid : 1, /* use @clockid for time fields */ 332 use_clockid : 1, /* use @clockid for time fields */
333 __reserved_1 : 38; 333 context_switch : 1, /* context switch data */
334 __reserved_1 : 37;
334 335
335 union { 336 union {
336 __u32 wakeup_events; /* wakeup every n events */ 337 __u32 wakeup_events; /* wakeup every n events */
@@ -572,9 +573,11 @@ struct perf_event_mmap_page {
572/* 573/*
573 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on 574 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
574 * different events so can reuse the same bit position. 575 * different events so can reuse the same bit position.
576 * Ditto PERF_RECORD_MISC_SWITCH_OUT.
575 */ 577 */
576#define PERF_RECORD_MISC_MMAP_DATA (1 << 13) 578#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
577#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) 579#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
580#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
578/* 581/*
579 * Indicates that the content of PERF_SAMPLE_IP points to 582 * Indicates that the content of PERF_SAMPLE_IP points to
580 * the actual instruction that triggered the event. See also 583 * the actual instruction that triggered the event. See also
@@ -818,6 +821,32 @@ enum perf_event_type {
818 */ 821 */
819 PERF_RECORD_LOST_SAMPLES = 13, 822 PERF_RECORD_LOST_SAMPLES = 13,
820 823
824 /*
825 * Records a context switch in or out (flagged by
826 * PERF_RECORD_MISC_SWITCH_OUT). See also
827 * PERF_RECORD_SWITCH_CPU_WIDE.
828 *
829 * struct {
830 * struct perf_event_header header;
831 * struct sample_id sample_id;
832 * };
833 */
834 PERF_RECORD_SWITCH = 14,
835
836 /*
837 * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
838 * next_prev_tid that are the next (switching out) or previous
839 * (switching in) pid/tid.
840 *
841 * struct {
842 * struct perf_event_header header;
843 * u32 next_prev_pid;
844 * u32 next_prev_tid;
845 * struct sample_id sample_id;
846 * };
847 */
848 PERF_RECORD_SWITCH_CPU_WIDE = 15,
849
821 PERF_RECORD_MAX, /* non-ABI */ 850 PERF_RECORD_MAX, /* non-ABI */
822}; 851};
823 852
@@ -922,6 +951,7 @@ union perf_mem_data_src {
922 * 951 *
923 * in_tx: running in a hardware transaction 952 * in_tx: running in a hardware transaction
924 * abort: aborting a hardware transaction 953 * abort: aborting a hardware transaction
954 * cycles: cycles from last branch (or 0 if not supported)
925 */ 955 */
926struct perf_branch_entry { 956struct perf_branch_entry {
927 __u64 from; 957 __u64 from;
@@ -930,7 +960,8 @@ struct perf_branch_entry {
930 predicted:1,/* target predicted */ 960 predicted:1,/* target predicted */
931 in_tx:1, /* in transaction */ 961 in_tx:1, /* in transaction */
932 abort:1, /* transaction abort */ 962 abort:1, /* transaction abort */
933 reserved:60; 963 cycles:16, /* cycle count to last branch */
964 reserved:44;
934}; 965};
935 966
936#endif /* _UAPI_LINUX_PERF_EVENT_H */ 967#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e6feb5114134..ae16867670a9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -163,6 +163,7 @@ static atomic_t nr_mmap_events __read_mostly;
163static atomic_t nr_comm_events __read_mostly; 163static atomic_t nr_comm_events __read_mostly;
164static atomic_t nr_task_events __read_mostly; 164static atomic_t nr_task_events __read_mostly;
165static atomic_t nr_freq_events __read_mostly; 165static atomic_t nr_freq_events __read_mostly;
166static atomic_t nr_switch_events __read_mostly;
166 167
167static LIST_HEAD(pmus); 168static LIST_HEAD(pmus);
168static DEFINE_MUTEX(pmus_lock); 169static DEFINE_MUTEX(pmus_lock);
@@ -2619,6 +2620,9 @@ static void perf_pmu_sched_task(struct task_struct *prev,
2619 local_irq_restore(flags); 2620 local_irq_restore(flags);
2620} 2621}
2621 2622
2623static void perf_event_switch(struct task_struct *task,
2624 struct task_struct *next_prev, bool sched_in);
2625
2622#define for_each_task_context_nr(ctxn) \ 2626#define for_each_task_context_nr(ctxn) \
2623 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) 2627 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
2624 2628
@@ -2641,6 +2645,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
2641 if (__this_cpu_read(perf_sched_cb_usages)) 2645 if (__this_cpu_read(perf_sched_cb_usages))
2642 perf_pmu_sched_task(task, next, false); 2646 perf_pmu_sched_task(task, next, false);
2643 2647
2648 if (atomic_read(&nr_switch_events))
2649 perf_event_switch(task, next, false);
2650
2644 for_each_task_context_nr(ctxn) 2651 for_each_task_context_nr(ctxn)
2645 perf_event_context_sched_out(task, ctxn, next); 2652 perf_event_context_sched_out(task, ctxn, next);
2646 2653
@@ -2831,6 +2838,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2831 if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) 2838 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
2832 perf_cgroup_sched_in(prev, task); 2839 perf_cgroup_sched_in(prev, task);
2833 2840
2841 if (atomic_read(&nr_switch_events))
2842 perf_event_switch(task, prev, true);
2843
2834 if (__this_cpu_read(perf_sched_cb_usages)) 2844 if (__this_cpu_read(perf_sched_cb_usages))
2835 perf_pmu_sched_task(prev, task, true); 2845 perf_pmu_sched_task(prev, task, true);
2836} 2846}
@@ -3454,6 +3464,10 @@ static void unaccount_event(struct perf_event *event)
3454 atomic_dec(&nr_task_events); 3464 atomic_dec(&nr_task_events);
3455 if (event->attr.freq) 3465 if (event->attr.freq)
3456 atomic_dec(&nr_freq_events); 3466 atomic_dec(&nr_freq_events);
3467 if (event->attr.context_switch) {
3468 static_key_slow_dec_deferred(&perf_sched_events);
3469 atomic_dec(&nr_switch_events);
3470 }
3457 if (is_cgroup_event(event)) 3471 if (is_cgroup_event(event))
3458 static_key_slow_dec_deferred(&perf_sched_events); 3472 static_key_slow_dec_deferred(&perf_sched_events);
3459 if (has_branch_stack(event)) 3473 if (has_branch_stack(event))
@@ -6025,6 +6039,91 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
6025} 6039}
6026 6040
6027/* 6041/*
6042 * context_switch tracking
6043 */
6044
6045struct perf_switch_event {
6046 struct task_struct *task;
6047 struct task_struct *next_prev;
6048
6049 struct {
6050 struct perf_event_header header;
6051 u32 next_prev_pid;
6052 u32 next_prev_tid;
6053 } event_id;
6054};
6055
6056static int perf_event_switch_match(struct perf_event *event)
6057{
6058 return event->attr.context_switch;
6059}
6060
6061static void perf_event_switch_output(struct perf_event *event, void *data)
6062{
6063 struct perf_switch_event *se = data;
6064 struct perf_output_handle handle;
6065 struct perf_sample_data sample;
6066 int ret;
6067
6068 if (!perf_event_switch_match(event))
6069 return;
6070
6071 /* Only CPU-wide events are allowed to see next/prev pid/tid */
6072 if (event->ctx->task) {
6073 se->event_id.header.type = PERF_RECORD_SWITCH;
6074 se->event_id.header.size = sizeof(se->event_id.header);
6075 } else {
6076 se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
6077 se->event_id.header.size = sizeof(se->event_id);
6078 se->event_id.next_prev_pid =
6079 perf_event_pid(event, se->next_prev);
6080 se->event_id.next_prev_tid =
6081 perf_event_tid(event, se->next_prev);
6082 }
6083
6084 perf_event_header__init_id(&se->event_id.header, &sample, event);
6085
6086 ret = perf_output_begin(&handle, event, se->event_id.header.size);
6087 if (ret)
6088 return;
6089
6090 if (event->ctx->task)
6091 perf_output_put(&handle, se->event_id.header);
6092 else
6093 perf_output_put(&handle, se->event_id);
6094
6095 perf_event__output_id_sample(event, &handle, &sample);
6096
6097 perf_output_end(&handle);
6098}
6099
6100static void perf_event_switch(struct task_struct *task,
6101 struct task_struct *next_prev, bool sched_in)
6102{
6103 struct perf_switch_event switch_event;
6104
6105 /* N.B. caller checks nr_switch_events != 0 */
6106
6107 switch_event = (struct perf_switch_event){
6108 .task = task,
6109 .next_prev = next_prev,
6110 .event_id = {
6111 .header = {
6112 /* .type */
6113 .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
6114 /* .size */
6115 },
6116 /* .next_prev_pid */
6117 /* .next_prev_tid */
6118 },
6119 };
6120
6121 perf_event_aux(perf_event_switch_output,
6122 &switch_event,
6123 NULL);
6124}
6125
6126/*
6028 * IRQ throttle logging 6127 * IRQ throttle logging
6029 */ 6128 */
6030 6129
@@ -6083,8 +6182,6 @@ static void perf_log_itrace_start(struct perf_event *event)
6083 event->hw.itrace_started) 6182 event->hw.itrace_started)
6084 return; 6183 return;
6085 6184
6086 event->hw.itrace_started = 1;
6087
6088 rec.header.type = PERF_RECORD_ITRACE_START; 6185 rec.header.type = PERF_RECORD_ITRACE_START;
6089 rec.header.misc = 0; 6186 rec.header.misc = 0;
6090 rec.header.size = sizeof(rec); 6187 rec.header.size = sizeof(rec);
@@ -6792,8 +6889,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
6792 if (event->tp_event->prog) 6889 if (event->tp_event->prog)
6793 return -EEXIST; 6890 return -EEXIST;
6794 6891
6795 if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) 6892 if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
6796 /* bpf programs can only be attached to kprobes */ 6893 /* bpf programs can only be attached to u/kprobes */
6797 return -EINVAL; 6894 return -EINVAL;
6798 6895
6799 prog = bpf_prog_get(prog_fd); 6896 prog = bpf_prog_get(prog_fd);
@@ -7522,6 +7619,10 @@ static void account_event(struct perf_event *event)
7522 if (atomic_inc_return(&nr_freq_events) == 1) 7619 if (atomic_inc_return(&nr_freq_events) == 1)
7523 tick_nohz_full_kick_all(); 7620 tick_nohz_full_kick_all();
7524 } 7621 }
7622 if (event->attr.context_switch) {
7623 atomic_inc(&nr_switch_events);
7624 static_key_slow_inc(&perf_sched_events.key);
7625 }
7525 if (has_branch_stack(event)) 7626 if (has_branch_stack(event))
7526 static_key_slow_inc(&perf_sched_events.key); 7627 static_key_slow_inc(&perf_sched_events.key);
7527 if (is_cgroup_event(event)) 7628 if (is_cgroup_event(event))
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index c8aa3f75bc4d..182bc30899d5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -437,7 +437,10 @@ static struct page *rb_alloc_aux_page(int node, int order)
437 437
438 if (page && order) { 438 if (page && order) {
439 /* 439 /*
440 * Communicate the allocation size to the driver 440 * Communicate the allocation size to the driver:
441 * if we managed to secure a high-order allocation,
442 * set its first page's private to this order;
443 * !PagePrivate(page) means it's just a normal page.
441 */ 444 */
442 split_page(page, order); 445 split_page(page, order);
443 SetPagePrivate(page); 446 SetPagePrivate(page);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index cb346f26a22d..4e5e9798aa0c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -86,15 +86,6 @@ struct uprobe {
86 struct arch_uprobe arch; 86 struct arch_uprobe arch;
87}; 87};
88 88
89struct return_instance {
90 struct uprobe *uprobe;
91 unsigned long func;
92 unsigned long orig_ret_vaddr; /* original return address */
93 bool chained; /* true, if instance is nested */
94
95 struct return_instance *next; /* keep as stack */
96};
97
98/* 89/*
99 * Execute out of line area: anonymous executable mapping installed 90 * Execute out of line area: anonymous executable mapping installed
100 * by the probed task to execute the copy of the original instruction 91 * by the probed task to execute the copy of the original instruction
@@ -105,17 +96,18 @@ struct return_instance {
105 * allocated. 96 * allocated.
106 */ 97 */
107struct xol_area { 98struct xol_area {
108 wait_queue_head_t wq; /* if all slots are busy */ 99 wait_queue_head_t wq; /* if all slots are busy */
109 atomic_t slot_count; /* number of in-use slots */ 100 atomic_t slot_count; /* number of in-use slots */
110 unsigned long *bitmap; /* 0 = free slot */ 101 unsigned long *bitmap; /* 0 = free slot */
111 struct page *page;
112 102
103 struct vm_special_mapping xol_mapping;
104 struct page *pages[2];
113 /* 105 /*
114 * We keep the vma's vm_start rather than a pointer to the vma 106 * We keep the vma's vm_start rather than a pointer to the vma
115 * itself. The probed process or a naughty kernel module could make 107 * itself. The probed process or a naughty kernel module could make
116 * the vma go away, and we must handle that reasonably gracefully. 108 * the vma go away, and we must handle that reasonably gracefully.
117 */ 109 */
118 unsigned long vaddr; /* Page(s) of instruction slots */ 110 unsigned long vaddr; /* Page(s) of instruction slots */
119}; 111};
120 112
121/* 113/*
@@ -366,6 +358,18 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
366 return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn); 358 return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn);
367} 359}
368 360
361static struct uprobe *get_uprobe(struct uprobe *uprobe)
362{
363 atomic_inc(&uprobe->ref);
364 return uprobe;
365}
366
367static void put_uprobe(struct uprobe *uprobe)
368{
369 if (atomic_dec_and_test(&uprobe->ref))
370 kfree(uprobe);
371}
372
369static int match_uprobe(struct uprobe *l, struct uprobe *r) 373static int match_uprobe(struct uprobe *l, struct uprobe *r)
370{ 374{
371 if (l->inode < r->inode) 375 if (l->inode < r->inode)
@@ -393,10 +397,8 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
393 while (n) { 397 while (n) {
394 uprobe = rb_entry(n, struct uprobe, rb_node); 398 uprobe = rb_entry(n, struct uprobe, rb_node);
395 match = match_uprobe(&u, uprobe); 399 match = match_uprobe(&u, uprobe);
396 if (!match) { 400 if (!match)
397 atomic_inc(&uprobe->ref); 401 return get_uprobe(uprobe);
398 return uprobe;
399 }
400 402
401 if (match < 0) 403 if (match < 0)
402 n = n->rb_left; 404 n = n->rb_left;
@@ -432,10 +434,8 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
432 parent = *p; 434 parent = *p;
433 u = rb_entry(parent, struct uprobe, rb_node); 435 u = rb_entry(parent, struct uprobe, rb_node);
434 match = match_uprobe(uprobe, u); 436 match = match_uprobe(uprobe, u);
435 if (!match) { 437 if (!match)
436 atomic_inc(&u->ref); 438 return get_uprobe(u);
437 return u;
438 }
439 439
440 if (match < 0) 440 if (match < 0)
441 p = &parent->rb_left; 441 p = &parent->rb_left;
@@ -472,12 +472,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
472 return u; 472 return u;
473} 473}
474 474
475static void put_uprobe(struct uprobe *uprobe)
476{
477 if (atomic_dec_and_test(&uprobe->ref))
478 kfree(uprobe);
479}
480
481static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) 475static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
482{ 476{
483 struct uprobe *uprobe, *cur_uprobe; 477 struct uprobe *uprobe, *cur_uprobe;
@@ -1039,14 +1033,14 @@ static void build_probe_list(struct inode *inode,
1039 if (u->inode != inode || u->offset < min) 1033 if (u->inode != inode || u->offset < min)
1040 break; 1034 break;
1041 list_add(&u->pending_list, head); 1035 list_add(&u->pending_list, head);
1042 atomic_inc(&u->ref); 1036 get_uprobe(u);
1043 } 1037 }
1044 for (t = n; (t = rb_next(t)); ) { 1038 for (t = n; (t = rb_next(t)); ) {
1045 u = rb_entry(t, struct uprobe, rb_node); 1039 u = rb_entry(t, struct uprobe, rb_node);
1046 if (u->inode != inode || u->offset > max) 1040 if (u->inode != inode || u->offset > max)
1047 break; 1041 break;
1048 list_add(&u->pending_list, head); 1042 list_add(&u->pending_list, head);
1049 atomic_inc(&u->ref); 1043 get_uprobe(u);
1050 } 1044 }
1051 } 1045 }
1052 spin_unlock(&uprobes_treelock); 1046 spin_unlock(&uprobes_treelock);
@@ -1132,11 +1126,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1132/* Slot allocation for XOL */ 1126/* Slot allocation for XOL */
1133static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) 1127static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
1134{ 1128{
1135 int ret = -EALREADY; 1129 struct vm_area_struct *vma;
1130 int ret;
1136 1131
1137 down_write(&mm->mmap_sem); 1132 down_write(&mm->mmap_sem);
1138 if (mm->uprobes_state.xol_area) 1133 if (mm->uprobes_state.xol_area) {
1134 ret = -EALREADY;
1139 goto fail; 1135 goto fail;
1136 }
1140 1137
1141 if (!area->vaddr) { 1138 if (!area->vaddr) {
1142 /* Try to map as high as possible, this is only a hint. */ 1139 /* Try to map as high as possible, this is only a hint. */
@@ -1148,11 +1145,15 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
1148 } 1145 }
1149 } 1146 }
1150 1147
1151 ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, 1148 vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
1152 VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page); 1149 VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
1153 if (ret) 1150 &area->xol_mapping);
1151 if (IS_ERR(vma)) {
1152 ret = PTR_ERR(vma);
1154 goto fail; 1153 goto fail;
1154 }
1155 1155
1156 ret = 0;
1156 smp_wmb(); /* pairs with get_xol_area() */ 1157 smp_wmb(); /* pairs with get_xol_area() */
1157 mm->uprobes_state.xol_area = area; 1158 mm->uprobes_state.xol_area = area;
1158 fail: 1159 fail:
@@ -1175,21 +1176,24 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
1175 if (!area->bitmap) 1176 if (!area->bitmap)
1176 goto free_area; 1177 goto free_area;
1177 1178
1178 area->page = alloc_page(GFP_HIGHUSER); 1179 area->xol_mapping.name = "[uprobes]";
1179 if (!area->page) 1180 area->xol_mapping.pages = area->pages;
1181 area->pages[0] = alloc_page(GFP_HIGHUSER);
1182 if (!area->pages[0])
1180 goto free_bitmap; 1183 goto free_bitmap;
1184 area->pages[1] = NULL;
1181 1185
1182 area->vaddr = vaddr; 1186 area->vaddr = vaddr;
1183 init_waitqueue_head(&area->wq); 1187 init_waitqueue_head(&area->wq);
1184 /* Reserve the 1st slot for get_trampoline_vaddr() */ 1188 /* Reserve the 1st slot for get_trampoline_vaddr() */
1185 set_bit(0, area->bitmap); 1189 set_bit(0, area->bitmap);
1186 atomic_set(&area->slot_count, 1); 1190 atomic_set(&area->slot_count, 1);
1187 copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); 1191 copy_to_page(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
1188 1192
1189 if (!xol_add_vma(mm, area)) 1193 if (!xol_add_vma(mm, area))
1190 return area; 1194 return area;
1191 1195
1192 __free_page(area->page); 1196 __free_page(area->pages[0]);
1193 free_bitmap: 1197 free_bitmap:
1194 kfree(area->bitmap); 1198 kfree(area->bitmap);
1195 free_area: 1199 free_area:
@@ -1227,7 +1231,7 @@ void uprobe_clear_state(struct mm_struct *mm)
1227 if (!area) 1231 if (!area)
1228 return; 1232 return;
1229 1233
1230 put_page(area->page); 1234 put_page(area->pages[0]);
1231 kfree(area->bitmap); 1235 kfree(area->bitmap);
1232 kfree(area); 1236 kfree(area);
1233} 1237}
@@ -1296,7 +1300,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1296 if (unlikely(!xol_vaddr)) 1300 if (unlikely(!xol_vaddr))
1297 return 0; 1301 return 0;
1298 1302
1299 arch_uprobe_copy_ixol(area->page, xol_vaddr, 1303 arch_uprobe_copy_ixol(area->pages[0], xol_vaddr,
1300 &uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); 1304 &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
1301 1305
1302 return xol_vaddr; 1306 return xol_vaddr;
@@ -1333,6 +1337,7 @@ static void xol_free_insn_slot(struct task_struct *tsk)
1333 1337
1334 clear_bit(slot_nr, area->bitmap); 1338 clear_bit(slot_nr, area->bitmap);
1335 atomic_dec(&area->slot_count); 1339 atomic_dec(&area->slot_count);
1340 smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
1336 if (waitqueue_active(&area->wq)) 1341 if (waitqueue_active(&area->wq))
1337 wake_up(&area->wq); 1342 wake_up(&area->wq);
1338 1343
@@ -1376,6 +1381,14 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
1376 return instruction_pointer(regs); 1381 return instruction_pointer(regs);
1377} 1382}
1378 1383
1384static struct return_instance *free_ret_instance(struct return_instance *ri)
1385{
1386 struct return_instance *next = ri->next;
1387 put_uprobe(ri->uprobe);
1388 kfree(ri);
1389 return next;
1390}
1391
1379/* 1392/*
1380 * Called with no locks held. 1393 * Called with no locks held.
1381 * Called in context of a exiting or a exec-ing thread. 1394 * Called in context of a exiting or a exec-ing thread.
@@ -1383,7 +1396,7 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
1383void uprobe_free_utask(struct task_struct *t) 1396void uprobe_free_utask(struct task_struct *t)
1384{ 1397{
1385 struct uprobe_task *utask = t->utask; 1398 struct uprobe_task *utask = t->utask;
1386 struct return_instance *ri, *tmp; 1399 struct return_instance *ri;
1387 1400
1388 if (!utask) 1401 if (!utask)
1389 return; 1402 return;
@@ -1392,13 +1405,8 @@ void uprobe_free_utask(struct task_struct *t)
1392 put_uprobe(utask->active_uprobe); 1405 put_uprobe(utask->active_uprobe);
1393 1406
1394 ri = utask->return_instances; 1407 ri = utask->return_instances;
1395 while (ri) { 1408 while (ri)
1396 tmp = ri; 1409 ri = free_ret_instance(ri);
1397 ri = ri->next;
1398
1399 put_uprobe(tmp->uprobe);
1400 kfree(tmp);
1401 }
1402 1410
1403 xol_free_insn_slot(t); 1411 xol_free_insn_slot(t);
1404 kfree(utask); 1412 kfree(utask);
@@ -1437,7 +1445,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
1437 return -ENOMEM; 1445 return -ENOMEM;
1438 1446
1439 *n = *o; 1447 *n = *o;
1440 atomic_inc(&n->uprobe->ref); 1448 get_uprobe(n->uprobe);
1441 n->next = NULL; 1449 n->next = NULL;
1442 1450
1443 *p = n; 1451 *p = n;
@@ -1515,12 +1523,25 @@ static unsigned long get_trampoline_vaddr(void)
1515 return trampoline_vaddr; 1523 return trampoline_vaddr;
1516} 1524}
1517 1525
1526static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
1527 struct pt_regs *regs)
1528{
1529 struct return_instance *ri = utask->return_instances;
1530 enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
1531
1532 while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
1533 ri = free_ret_instance(ri);
1534 utask->depth--;
1535 }
1536 utask->return_instances = ri;
1537}
1538
1518static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) 1539static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
1519{ 1540{
1520 struct return_instance *ri; 1541 struct return_instance *ri;
1521 struct uprobe_task *utask; 1542 struct uprobe_task *utask;
1522 unsigned long orig_ret_vaddr, trampoline_vaddr; 1543 unsigned long orig_ret_vaddr, trampoline_vaddr;
1523 bool chained = false; 1544 bool chained;
1524 1545
1525 if (!get_xol_area()) 1546 if (!get_xol_area())
1526 return; 1547 return;
@@ -1536,49 +1557,47 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
1536 return; 1557 return;
1537 } 1558 }
1538 1559
1539 ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL); 1560 ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
1540 if (!ri) 1561 if (!ri)
1541 goto fail; 1562 return;
1542 1563
1543 trampoline_vaddr = get_trampoline_vaddr(); 1564 trampoline_vaddr = get_trampoline_vaddr();
1544 orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); 1565 orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
1545 if (orig_ret_vaddr == -1) 1566 if (orig_ret_vaddr == -1)
1546 goto fail; 1567 goto fail;
1547 1568
1569 /* drop the entries invalidated by longjmp() */
1570 chained = (orig_ret_vaddr == trampoline_vaddr);
1571 cleanup_return_instances(utask, chained, regs);
1572
1548 /* 1573 /*
1549 * We don't want to keep trampoline address in stack, rather keep the 1574 * We don't want to keep trampoline address in stack, rather keep the
1550 * original return address of first caller thru all the consequent 1575 * original return address of first caller thru all the consequent
1551 * instances. This also makes breakpoint unwrapping easier. 1576 * instances. This also makes breakpoint unwrapping easier.
1552 */ 1577 */
1553 if (orig_ret_vaddr == trampoline_vaddr) { 1578 if (chained) {
1554 if (!utask->return_instances) { 1579 if (!utask->return_instances) {
1555 /* 1580 /*
1556 * This situation is not possible. Likely we have an 1581 * This situation is not possible. Likely we have an
1557 * attack from user-space. 1582 * attack from user-space.
1558 */ 1583 */
1559 pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n", 1584 uprobe_warn(current, "handle tail call");
1560 current->pid, current->tgid);
1561 goto fail; 1585 goto fail;
1562 } 1586 }
1563
1564 chained = true;
1565 orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; 1587 orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
1566 } 1588 }
1567 1589
1568 atomic_inc(&uprobe->ref); 1590 ri->uprobe = get_uprobe(uprobe);
1569 ri->uprobe = uprobe;
1570 ri->func = instruction_pointer(regs); 1591 ri->func = instruction_pointer(regs);
1592 ri->stack = user_stack_pointer(regs);
1571 ri->orig_ret_vaddr = orig_ret_vaddr; 1593 ri->orig_ret_vaddr = orig_ret_vaddr;
1572 ri->chained = chained; 1594 ri->chained = chained;
1573 1595
1574 utask->depth++; 1596 utask->depth++;
1575
1576 /* add instance to the stack */
1577 ri->next = utask->return_instances; 1597 ri->next = utask->return_instances;
1578 utask->return_instances = ri; 1598 utask->return_instances = ri;
1579 1599
1580 return; 1600 return;
1581
1582 fail: 1601 fail:
1583 kfree(ri); 1602 kfree(ri);
1584} 1603}
@@ -1766,46 +1785,58 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
1766 up_read(&uprobe->register_rwsem); 1785 up_read(&uprobe->register_rwsem);
1767} 1786}
1768 1787
1769static bool handle_trampoline(struct pt_regs *regs) 1788static struct return_instance *find_next_ret_chain(struct return_instance *ri)
1770{ 1789{
1771 struct uprobe_task *utask;
1772 struct return_instance *ri, *tmp;
1773 bool chained; 1790 bool chained;
1774 1791
1792 do {
1793 chained = ri->chained;
1794 ri = ri->next; /* can't be NULL if chained */
1795 } while (chained);
1796
1797 return ri;
1798}
1799
1800static void handle_trampoline(struct pt_regs *regs)
1801{
1802 struct uprobe_task *utask;
1803 struct return_instance *ri, *next;
1804 bool valid;
1805
1775 utask = current->utask; 1806 utask = current->utask;
1776 if (!utask) 1807 if (!utask)
1777 return false; 1808 goto sigill;
1778 1809
1779 ri = utask->return_instances; 1810 ri = utask->return_instances;
1780 if (!ri) 1811 if (!ri)
1781 return false; 1812 goto sigill;
1782
1783 /*
1784 * TODO: we should throw out return_instance's invalidated by
1785 * longjmp(), currently we assume that the probed function always
1786 * returns.
1787 */
1788 instruction_pointer_set(regs, ri->orig_ret_vaddr);
1789
1790 for (;;) {
1791 handle_uretprobe_chain(ri, regs);
1792
1793 chained = ri->chained;
1794 put_uprobe(ri->uprobe);
1795
1796 tmp = ri;
1797 ri = ri->next;
1798 kfree(tmp);
1799 utask->depth--;
1800 1813
1801 if (!chained) 1814 do {
1802 break; 1815 /*
1803 BUG_ON(!ri); 1816 * We should throw out the frames invalidated by longjmp().
1804 } 1817 * If this chain is valid, then the next one should be alive
1818 * or NULL; the latter case means that nobody but ri->func
1819 * could hit this trampoline on return. TODO: sigaltstack().
1820 */
1821 next = find_next_ret_chain(ri);
1822 valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs);
1823
1824 instruction_pointer_set(regs, ri->orig_ret_vaddr);
1825 do {
1826 if (valid)
1827 handle_uretprobe_chain(ri, regs);
1828 ri = free_ret_instance(ri);
1829 utask->depth--;
1830 } while (ri != next);
1831 } while (!valid);
1805 1832
1806 utask->return_instances = ri; 1833 utask->return_instances = ri;
1834 return;
1835
1836 sigill:
1837 uprobe_warn(current, "handle uretprobe, sending SIGILL.");
1838 force_sig_info(SIGILL, SEND_SIG_FORCED, current);
1807 1839
1808 return true;
1809} 1840}
1810 1841
1811bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) 1842bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
@@ -1813,6 +1844,12 @@ bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
1813 return false; 1844 return false;
1814} 1845}
1815 1846
1847bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
1848 struct pt_regs *regs)
1849{
1850 return true;
1851}
1852
1816/* 1853/*
1817 * Run handler and ask thread to singlestep. 1854 * Run handler and ask thread to singlestep.
1818 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1855 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@@ -1824,13 +1861,8 @@ static void handle_swbp(struct pt_regs *regs)
1824 int uninitialized_var(is_swbp); 1861 int uninitialized_var(is_swbp);
1825 1862
1826 bp_vaddr = uprobe_get_swbp_addr(regs); 1863 bp_vaddr = uprobe_get_swbp_addr(regs);
1827 if (bp_vaddr == get_trampoline_vaddr()) { 1864 if (bp_vaddr == get_trampoline_vaddr())
1828 if (handle_trampoline(regs)) 1865 return handle_trampoline(regs);
1829 return;
1830
1831 pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n",
1832 current->pid, current->tgid);
1833 }
1834 1866
1835 uprobe = find_active_uprobe(bp_vaddr, &is_swbp); 1867 uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
1836 if (!uprobe) { 1868 if (!uprobe) {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c90e417bb963..d10ab6b9b5e0 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1332,7 +1332,7 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1332 addr < (unsigned long)__kprobes_text_end; 1332 addr < (unsigned long)__kprobes_text_end;
1333} 1333}
1334 1334
1335static bool within_kprobe_blacklist(unsigned long addr) 1335bool within_kprobe_blacklist(unsigned long addr)
1336{ 1336{
1337 struct kprobe_blacklist_entry *ent; 1337 struct kprobe_blacklist_entry *ent;
1338 1338
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 3b9a48ae153a..1153c43428f3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -434,7 +434,7 @@ config UPROBE_EVENT
434 434
435config BPF_EVENTS 435config BPF_EVENTS
436 depends on BPF_SYSCALL 436 depends on BPF_SYSCALL
437 depends on KPROBE_EVENT 437 depends on KPROBE_EVENT || UPROBE_EVENT
438 bool 438 bool
439 default y 439 default y
440 help 440 help
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index aa1ea7b36fa8..d2f6d0be3503 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -601,7 +601,22 @@ static int probes_seq_show(struct seq_file *m, void *v)
601 601
602 seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, 602 seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system,
603 trace_event_name(&tu->tp.call)); 603 trace_event_name(&tu->tp.call));
604 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); 604 seq_printf(m, " %s:", tu->filename);
605
606 /* Don't print "0x (null)" when offset is 0 */
607 if (tu->offset) {
608 seq_printf(m, "0x%p", (void *)tu->offset);
609 } else {
610 switch (sizeof(void *)) {
611 case 4:
612 seq_printf(m, "0x00000000");
613 break;
614 case 8:
615 default:
616 seq_printf(m, "0x0000000000000000");
617 break;
618 }
619 }
605 620
606 for (i = 0; i < tu->tp.nr_args; i++) 621 for (i = 0; i < tu->tp.nr_args; i++)
607 seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); 622 seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
@@ -1095,11 +1110,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
1095{ 1110{
1096 struct trace_event_call *call = &tu->tp.call; 1111 struct trace_event_call *call = &tu->tp.call;
1097 struct uprobe_trace_entry_head *entry; 1112 struct uprobe_trace_entry_head *entry;
1113 struct bpf_prog *prog = call->prog;
1098 struct hlist_head *head; 1114 struct hlist_head *head;
1099 void *data; 1115 void *data;
1100 int size, esize; 1116 int size, esize;
1101 int rctx; 1117 int rctx;
1102 1118
1119 if (prog && !trace_call_bpf(prog, regs))
1120 return;
1121
1103 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1122 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
1104 1123
1105 size = esize + tu->tp.size + dsize; 1124 size = esize + tu->tp.size + dsize;
@@ -1289,6 +1308,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
1289 return -ENODEV; 1308 return -ENODEV;
1290 } 1309 }
1291 1310
1311 call->flags = TRACE_EVENT_FL_UPROBE;
1292 call->class->reg = trace_uprobe_register; 1312 call->class->reg = trace_uprobe_register;
1293 call->data = tu; 1313 call->data = tu;
1294 ret = trace_add_event_call(call); 1314 ret = trace_add_event_call(call);
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt
index 00ad2d608727..aa5e092c4352 100644
--- a/tools/build/Documentation/Build.txt
+++ b/tools/build/Documentation/Build.txt
@@ -66,6 +66,7 @@ To follow the above example, the user provides following 'Build' files:
66 ex/Build: 66 ex/Build:
67 ex-y += a.o 67 ex-y += a.o
68 ex-y += b.o 68 ex-y += b.o
69 ex-y += b.o # duplicates in the lists are allowed
69 70
70 libex-y += c.o 71 libex-y += c.o
71 libex-y += d.o 72 libex-y += d.o
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index faca2bf6a430..0c5f485521d6 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -57,11 +57,13 @@ quiet_cmd_cc_i_c = CPP $@
57quiet_cmd_cc_s_c = AS $@ 57quiet_cmd_cc_s_c = AS $@
58 cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< 58 cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
59 59
60quiet_cmd_gen = GEN $@
61
60# Link agregate command 62# Link agregate command
61# If there's nothing to link, create empty $@ object. 63# If there's nothing to link, create empty $@ object.
62quiet_cmd_ld_multi = LD $@ 64quiet_cmd_ld_multi = LD $@
63 cmd_ld_multi = $(if $(strip $(obj-y)),\ 65 cmd_ld_multi = $(if $(strip $(obj-y)),\
64 $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) 66 $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
65 67
66# Build rules 68# Build rules
67$(OUTPUT)%.o: %.c FORCE 69$(OUTPUT)%.o: %.c FORCE
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 463ed8f2a267..74ca42093d70 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -33,7 +33,8 @@ FILES= \
33 test-compile-32.bin \ 33 test-compile-32.bin \
34 test-compile-x32.bin \ 34 test-compile-x32.bin \
35 test-zlib.bin \ 35 test-zlib.bin \
36 test-lzma.bin 36 test-lzma.bin \
37 test-bpf.bin
37 38
38CC := $(CROSS_COMPILE)gcc -MD 39CC := $(CROSS_COMPILE)gcc -MD
39PKG_CONFIG := $(CROSS_COMPILE)pkg-config 40PKG_CONFIG := $(CROSS_COMPILE)pkg-config
@@ -69,8 +70,13 @@ test-libelf.bin:
69test-glibc.bin: 70test-glibc.bin:
70 $(BUILD) 71 $(BUILD)
71 72
73DWARFLIBS := -ldw
74ifeq ($(findstring -static,${LDFLAGS}),-static)
75DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
76endif
77
72test-dwarf.bin: 78test-dwarf.bin:
73 $(BUILD) -ldw 79 $(BUILD) $(DWARFLIBS)
74 80
75test-libelf-mmap.bin: 81test-libelf-mmap.bin:
76 $(BUILD) -lelf 82 $(BUILD) -lelf
@@ -156,6 +162,9 @@ test-zlib.bin:
156test-lzma.bin: 162test-lzma.bin:
157 $(BUILD) -llzma 163 $(BUILD) -llzma
158 164
165test-bpf.bin:
166 $(BUILD)
167
159-include *.d 168-include *.d
160 169
161############################### 170###############################
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
new file mode 100644
index 000000000000..062bac811af9
--- /dev/null
+++ b/tools/build/feature/test-bpf.c
@@ -0,0 +1,18 @@
1#include <linux/bpf.h>
2
3int main(void)
4{
5 union bpf_attr attr;
6
7 attr.prog_type = BPF_PROG_TYPE_KPROBE;
8 attr.insn_cnt = 0;
9 attr.insns = 0;
10 attr.license = 0;
11 attr.log_buf = 0;
12 attr.log_size = 0;
13 attr.log_level = 0;
14 attr.kern_version = 0;
15
16 attr = attr;
17 return 0;
18}
diff --git a/tools/build/feature/test-glibc.c b/tools/build/feature/test-glibc.c
index b0820345cd98..9367f7586676 100644
--- a/tools/build/feature/test-glibc.c
+++ b/tools/build/feature/test-glibc.c
@@ -1,8 +1,19 @@
1#include <stdlib.h>
2
3#if !defined(__UCLIBC__)
1#include <gnu/libc-version.h> 4#include <gnu/libc-version.h>
5#else
6#define XSTR(s) STR(s)
7#define STR(s) #s
8#endif
2 9
3int main(void) 10int main(void)
4{ 11{
12#if !defined(__UCLIBC__)
5 const char *version = gnu_get_libc_version(); 13 const char *version = gnu_get_libc_version();
14#else
15 const char *version = XSTR(__GLIBC__) "." XSTR(__GLIBC_MINOR__);
16#endif
6 17
7 return (long)version; 18 return (long)version;
8} 19}
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
index 70d876237c57..429c7d452101 100644
--- a/tools/build/tests/ex/Build
+++ b/tools/build/tests/ex/Build
@@ -1,6 +1,7 @@
1ex-y += ex.o 1ex-y += ex.o
2ex-y += a.o 2ex-y += a.o
3ex-y += b.o 3ex-y += b.o
4ex-y += b.o
4ex-y += empty/ 5ex-y += empty/
5ex-y += empty2/ 6ex-y += empty2/
6 7
diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c
index 8305b3e9d48e..eb7cf4d18f8a 100644
--- a/tools/lib/api/fs/debugfs.c
+++ b/tools/lib/api/fs/debugfs.c
@@ -12,6 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13 13
14#include "debugfs.h" 14#include "debugfs.h"
15#include "tracefs.h"
15 16
16#ifndef DEBUGFS_DEFAULT_PATH 17#ifndef DEBUGFS_DEFAULT_PATH
17#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" 18#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug"
@@ -94,11 +95,21 @@ int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename
94 "Hint:\tIs the debugfs filesystem mounted?\n" 95 "Hint:\tIs the debugfs filesystem mounted?\n"
95 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); 96 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
96 break; 97 break;
97 case EACCES: 98 case EACCES: {
99 const char *mountpoint = debugfs_mountpoint;
100
101 if (!access(debugfs_mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) {
102 const char *tracefs_mntpoint = tracefs_find_mountpoint();
103
104 if (tracefs_mntpoint)
105 mountpoint = tracefs_mntpoint;
106 }
107
98 snprintf(buf, size, 108 snprintf(buf, size,
99 "Error:\tNo permissions to read %s/%s\n" 109 "Error:\tNo permissions to read %s/%s\n"
100 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", 110 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
101 debugfs_mountpoint, filename, debugfs_mountpoint); 111 debugfs_mountpoint, filename, mountpoint);
112 }
102 break; 113 break;
103 default: 114 default:
104 snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); 115 snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf)));
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
new file mode 100644
index 000000000000..812aeedaea38
--- /dev/null
+++ b/tools/lib/bpf/.gitignore
@@ -0,0 +1,2 @@
1libbpf_version.h
2FEATURE-DUMP
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
new file mode 100644
index 000000000000..d8749756352d
--- /dev/null
+++ b/tools/lib/bpf/Build
@@ -0,0 +1 @@
libbpf-y := libbpf.o bpf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
new file mode 100644
index 000000000000..f68d23a0b487
--- /dev/null
+++ b/tools/lib/bpf/Makefile
@@ -0,0 +1,195 @@
1# Most of this file is copied from tools/lib/traceevent/Makefile
2
3BPF_VERSION = 0
4BPF_PATCHLEVEL = 0
5BPF_EXTRAVERSION = 1
6
7MAKEFLAGS += --no-print-directory
8
9
10# Makefiles suck: This macro sets a default value of $(2) for the
11# variable named by $(1), unless the variable has been set by
12# environment or command line. This is necessary for CC and AR
13# because make sets default values, so the simpler ?= approach
14# won't work as expected.
15define allow-override
16 $(if $(or $(findstring environment,$(origin $(1))),\
17 $(findstring command line,$(origin $(1)))),,\
18 $(eval $(1) = $(2)))
19endef
20
21# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
22$(call allow-override,CC,$(CROSS_COMPILE)gcc)
23$(call allow-override,AR,$(CROSS_COMPILE)ar)
24
25INSTALL = install
26
27# Use DESTDIR for installing into a different root directory.
28# This is useful for building a package. The program will be
29# installed in this directory as if it was the root directory.
30# Then the build tool can move it later.
31DESTDIR ?=
32DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
33
34LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
35ifeq ($(LP64), 1)
36 libdir_relative = lib64
37else
38 libdir_relative = lib
39endif
40
41prefix ?= /usr/local
42libdir = $(prefix)/$(libdir_relative)
43man_dir = $(prefix)/share/man
44man_dir_SQ = '$(subst ','\'',$(man_dir))'
45
46export man_dir man_dir_SQ INSTALL
47export DESTDIR DESTDIR_SQ
48
49include ../../scripts/Makefile.include
50
51# copy a bit from Linux kbuild
52
53ifeq ("$(origin V)", "command line")
54 VERBOSE = $(V)
55endif
56ifndef VERBOSE
57 VERBOSE = 0
58endif
59
60ifeq ($(srctree),)
61srctree := $(patsubst %/,%,$(dir $(shell pwd)))
62srctree := $(patsubst %/,%,$(dir $(srctree)))
63srctree := $(patsubst %/,%,$(dir $(srctree)))
64#$(info Determined 'srctree' to be $(srctree))
65endif
66
67FEATURE_DISPLAY = libelf libelf-getphdrnum libelf-mmap bpf
68FEATURE_TESTS = libelf bpf
69
70INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
71FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
72
73include $(srctree)/tools/build/Makefile.feature
74
75export prefix libdir src obj
76
77# Shell quotes
78libdir_SQ = $(subst ','\'',$(libdir))
79libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
80plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
81
82LIB_FILE = libbpf.a libbpf.so
83
84VERSION = $(BPF_VERSION)
85PATCHLEVEL = $(BPF_PATCHLEVEL)
86EXTRAVERSION = $(BPF_EXTRAVERSION)
87
88OBJ = $@
89N =
90
91LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
92
93# Set compile option CFLAGS
94ifdef EXTRA_CFLAGS
95 CFLAGS := $(EXTRA_CFLAGS)
96else
97 CFLAGS := -g -Wall
98endif
99
100ifeq ($(feature-libelf-mmap), 1)
101 override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
102endif
103
104ifeq ($(feature-libelf-getphdrnum), 1)
105 override CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
106endif
107
108# Append required CFLAGS
109override CFLAGS += $(EXTRA_WARNINGS)
110override CFLAGS += -Werror -Wall
111override CFLAGS += -fPIC
112override CFLAGS += $(INCLUDES)
113
114ifeq ($(VERBOSE),1)
115 Q =
116else
117 Q = @
118endif
119
120# Disable command line variables (CFLAGS) overide from top
121# level Makefile (perf), otherwise build Makefile will get
122# the same command line setup.
123MAKEOVERRIDES=
124
125export srctree OUTPUT CC LD CFLAGS V
126build := -f $(srctree)/tools/build/Makefile.build dir=. obj
127
128BPF_IN := $(OUTPUT)libbpf-in.o
129LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
130
131CMD_TARGETS = $(LIB_FILE)
132
133TARGETS = $(CMD_TARGETS)
134
135all: $(VERSION_FILES) all_cmd
136
137all_cmd: $(CMD_TARGETS)
138
139$(BPF_IN): force elfdep bpfdep
140 $(Q)$(MAKE) $(build)=libbpf
141
142$(OUTPUT)libbpf.so: $(BPF_IN)
143 $(QUIET_LINK)$(CC) --shared $^ -o $@
144
145$(OUTPUT)libbpf.a: $(BPF_IN)
146 $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
147
148define update_dir
149 (echo $1 > $@.tmp; \
150 if [ -r $@ ] && cmp -s $@ $@.tmp; then \
151 rm -f $@.tmp; \
152 else \
153 echo ' UPDATE $@'; \
154 mv -f $@.tmp $@; \
155 fi);
156endef
157
158define do_install
159 if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
160 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
161 fi; \
162 $(INSTALL) $1 '$(DESTDIR_SQ)$2'
163endef
164
165install_lib: all_cmd
166 $(call QUIET_INSTALL, $(LIB_FILE)) \
167 $(call do_install,$(LIB_FILE),$(libdir_SQ))
168
169install: install_lib
170
171### Cleaning rules
172
173config-clean:
174 $(call QUIET_CLEAN, config)
175 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
176
177clean:
178 $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \
179 $(RM) LIBBPF-CFLAGS
180 $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP
181
182
183
184PHONY += force elfdep bpfdep
185force:
186
187elfdep:
188 @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi
189
190bpfdep:
191 @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi
192
193# Declare the contents of the .PHONY variable as phony. We keep that
194# information in a variable so we can use it in if_changed and friends.
195.PHONY: $(PHONY)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
new file mode 100644
index 000000000000..a6331050ab79
--- /dev/null
+++ b/tools/lib/bpf/bpf.c
@@ -0,0 +1,85 @@
1/*
2 * common eBPF ELF operations.
3 *
4 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
5 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
6 * Copyright (C) 2015 Huawei Inc.
7 */
8
9#include <stdlib.h>
10#include <memory.h>
11#include <unistd.h>
12#include <asm/unistd.h>
13#include <linux/bpf.h>
14#include "bpf.h"
15
16/*
17 * When building perf, unistd.h is override. Define __NR_bpf is
18 * required to be defined.
19 */
20#ifndef __NR_bpf
21# if defined(__i386__)
22# define __NR_bpf 357
23# elif defined(__x86_64__)
24# define __NR_bpf 321
25# elif defined(__aarch64__)
26# define __NR_bpf 280
27# else
28# error __NR_bpf not defined. libbpf does not support your arch.
29# endif
30#endif
31
32static __u64 ptr_to_u64(void *ptr)
33{
34 return (__u64) (unsigned long) ptr;
35}
36
37static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
38 unsigned int size)
39{
40 return syscall(__NR_bpf, cmd, attr, size);
41}
42
43int bpf_create_map(enum bpf_map_type map_type, int key_size,
44 int value_size, int max_entries)
45{
46 union bpf_attr attr;
47
48 memset(&attr, '\0', sizeof(attr));
49
50 attr.map_type = map_type;
51 attr.key_size = key_size;
52 attr.value_size = value_size;
53 attr.max_entries = max_entries;
54
55 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
56}
57
58int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
59 size_t insns_cnt, char *license,
60 u32 kern_version, char *log_buf, size_t log_buf_sz)
61{
62 int fd;
63 union bpf_attr attr;
64
65 bzero(&attr, sizeof(attr));
66 attr.prog_type = type;
67 attr.insn_cnt = (__u32)insns_cnt;
68 attr.insns = ptr_to_u64(insns);
69 attr.license = ptr_to_u64(license);
70 attr.log_buf = ptr_to_u64(NULL);
71 attr.log_size = 0;
72 attr.log_level = 0;
73 attr.kern_version = kern_version;
74
75 fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
76 if (fd >= 0 || !log_buf || !log_buf_sz)
77 return fd;
78
79 /* Try again with log */
80 attr.log_buf = ptr_to_u64(log_buf);
81 attr.log_size = log_buf_sz;
82 attr.log_level = 1;
83 log_buf[0] = 0;
84 return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
85}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
new file mode 100644
index 000000000000..854b7361b784
--- /dev/null
+++ b/tools/lib/bpf/bpf.h
@@ -0,0 +1,23 @@
1/*
2 * common eBPF ELF operations.
3 *
4 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
5 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
6 * Copyright (C) 2015 Huawei Inc.
7 */
8#ifndef __BPF_BPF_H
9#define __BPF_BPF_H
10
11#include <linux/bpf.h>
12
13int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
14 int max_entries);
15
16/* Recommend log buffer size */
17#define BPF_LOG_BUF_SIZE 65536
18int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
19 size_t insns_cnt, char *license,
20 u32 kern_version, char *log_buf,
21 size_t log_buf_sz);
22
23#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
new file mode 100644
index 000000000000..4fa4bc4505f5
--- /dev/null
+++ b/tools/lib/bpf/libbpf.c
@@ -0,0 +1,1037 @@
1/*
2 * Common eBPF ELF object loading operations.
3 *
4 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
5 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
6 * Copyright (C) 2015 Huawei Inc.
7 */
8
9#include <stdlib.h>
10#include <stdio.h>
11#include <stdarg.h>
12#include <inttypes.h>
13#include <string.h>
14#include <unistd.h>
15#include <fcntl.h>
16#include <errno.h>
17#include <asm/unistd.h>
18#include <linux/kernel.h>
19#include <linux/bpf.h>
20#include <linux/list.h>
21#include <libelf.h>
22#include <gelf.h>
23
24#include "libbpf.h"
25#include "bpf.h"
26
27#define __printf(a, b) __attribute__((format(printf, a, b)))
28
29__printf(1, 2)
30static int __base_pr(const char *format, ...)
31{
32 va_list args;
33 int err;
34
35 va_start(args, format);
36 err = vfprintf(stderr, format, args);
37 va_end(args);
38 return err;
39}
40
41static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr;
42static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr;
43static __printf(1, 2) libbpf_print_fn_t __pr_debug;
44
45#define __pr(func, fmt, ...) \
46do { \
47 if ((func)) \
48 (func)("libbpf: " fmt, ##__VA_ARGS__); \
49} while (0)
50
51#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
52#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
53#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
54
55void libbpf_set_print(libbpf_print_fn_t warn,
56 libbpf_print_fn_t info,
57 libbpf_print_fn_t debug)
58{
59 __pr_warning = warn;
60 __pr_info = info;
61 __pr_debug = debug;
62}
63
64/* Copied from tools/perf/util/util.h */
65#ifndef zfree
66# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
67#endif
68
69#ifndef zclose
70# define zclose(fd) ({ \
71 int ___err = 0; \
72 if ((fd) >= 0) \
73 ___err = close((fd)); \
74 fd = -1; \
75 ___err; })
76#endif
77
78#ifdef HAVE_LIBELF_MMAP_SUPPORT
79# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
80#else
81# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
82#endif
83
84/*
85 * bpf_prog should be a better name but it has been used in
86 * linux/filter.h.
87 */
88struct bpf_program {
89 /* Index in elf obj file, for relocation use. */
90 int idx;
91 char *section_name;
92 struct bpf_insn *insns;
93 size_t insns_cnt;
94
95 struct {
96 int insn_idx;
97 int map_idx;
98 } *reloc_desc;
99 int nr_reloc;
100
101 int fd;
102
103 struct bpf_object *obj;
104 void *priv;
105 bpf_program_clear_priv_t clear_priv;
106};
107
108static LIST_HEAD(bpf_objects_list);
109
110struct bpf_object {
111 char license[64];
112 u32 kern_version;
113 void *maps_buf;
114 size_t maps_buf_sz;
115
116 struct bpf_program *programs;
117 size_t nr_programs;
118 int *map_fds;
119 /*
120 * This field is required because maps_buf will be freed and
121 * maps_buf_sz will be set to 0 after loaded.
122 */
123 size_t nr_map_fds;
124 bool loaded;
125
126 /*
127 * Information when doing elf related work. Only valid if fd
128 * is valid.
129 */
130 struct {
131 int fd;
132 void *obj_buf;
133 size_t obj_buf_sz;
134 Elf *elf;
135 GElf_Ehdr ehdr;
136 Elf_Data *symbols;
137 struct {
138 GElf_Shdr shdr;
139 Elf_Data *data;
140 } *reloc;
141 int nr_reloc;
142 } efile;
143 /*
144 * All loaded bpf_object is linked in a list, which is
145 * hidden to caller. bpf_objects__<func> handlers deal with
146 * all objects.
147 */
148 struct list_head list;
149 char path[];
150};
151#define obj_elf_valid(o) ((o)->efile.elf)
152
153static void bpf_program__unload(struct bpf_program *prog)
154{
155 if (!prog)
156 return;
157
158 zclose(prog->fd);
159}
160
161static void bpf_program__exit(struct bpf_program *prog)
162{
163 if (!prog)
164 return;
165
166 if (prog->clear_priv)
167 prog->clear_priv(prog, prog->priv);
168
169 prog->priv = NULL;
170 prog->clear_priv = NULL;
171
172 bpf_program__unload(prog);
173 zfree(&prog->section_name);
174 zfree(&prog->insns);
175 zfree(&prog->reloc_desc);
176
177 prog->nr_reloc = 0;
178 prog->insns_cnt = 0;
179 prog->idx = -1;
180}
181
182static int
183bpf_program__init(void *data, size_t size, char *name, int idx,
184 struct bpf_program *prog)
185{
186 if (size < sizeof(struct bpf_insn)) {
187 pr_warning("corrupted section '%s'\n", name);
188 return -EINVAL;
189 }
190
191 bzero(prog, sizeof(*prog));
192
193 prog->section_name = strdup(name);
194 if (!prog->section_name) {
195 pr_warning("failed to alloc name for prog %s\n",
196 name);
197 goto errout;
198 }
199
200 prog->insns = malloc(size);
201 if (!prog->insns) {
202 pr_warning("failed to alloc insns for %s\n", name);
203 goto errout;
204 }
205 prog->insns_cnt = size / sizeof(struct bpf_insn);
206 memcpy(prog->insns, data,
207 prog->insns_cnt * sizeof(struct bpf_insn));
208 prog->idx = idx;
209 prog->fd = -1;
210
211 return 0;
212errout:
213 bpf_program__exit(prog);
214 return -ENOMEM;
215}
216
217static int
218bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
219 char *name, int idx)
220{
221 struct bpf_program prog, *progs;
222 int nr_progs, err;
223
224 err = bpf_program__init(data, size, name, idx, &prog);
225 if (err)
226 return err;
227
228 progs = obj->programs;
229 nr_progs = obj->nr_programs;
230
231 progs = realloc(progs, sizeof(progs[0]) * (nr_progs + 1));
232 if (!progs) {
233 /*
234 * In this case the original obj->programs
235 * is still valid, so don't need special treat for
236 * bpf_close_object().
237 */
238 pr_warning("failed to alloc a new program '%s'\n",
239 name);
240 bpf_program__exit(&prog);
241 return -ENOMEM;
242 }
243
244 pr_debug("found program %s\n", prog.section_name);
245 obj->programs = progs;
246 obj->nr_programs = nr_progs + 1;
247 prog.obj = obj;
248 progs[nr_progs] = prog;
249 return 0;
250}
251
252static struct bpf_object *bpf_object__new(const char *path,
253 void *obj_buf,
254 size_t obj_buf_sz)
255{
256 struct bpf_object *obj;
257
258 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
259 if (!obj) {
260 pr_warning("alloc memory failed for %s\n", path);
261 return NULL;
262 }
263
264 strcpy(obj->path, path);
265 obj->efile.fd = -1;
266
267 /*
268 * Caller of this function should also calls
269 * bpf_object__elf_finish() after data collection to return
270 * obj_buf to user. If not, we should duplicate the buffer to
271 * avoid user freeing them before elf finish.
272 */
273 obj->efile.obj_buf = obj_buf;
274 obj->efile.obj_buf_sz = obj_buf_sz;
275
276 obj->loaded = false;
277
278 INIT_LIST_HEAD(&obj->list);
279 list_add(&obj->list, &bpf_objects_list);
280 return obj;
281}
282
283static void bpf_object__elf_finish(struct bpf_object *obj)
284{
285 if (!obj_elf_valid(obj))
286 return;
287
288 if (obj->efile.elf) {
289 elf_end(obj->efile.elf);
290 obj->efile.elf = NULL;
291 }
292 obj->efile.symbols = NULL;
293
294 zfree(&obj->efile.reloc);
295 obj->efile.nr_reloc = 0;
296 zclose(obj->efile.fd);
297 obj->efile.obj_buf = NULL;
298 obj->efile.obj_buf_sz = 0;
299}
300
301static int bpf_object__elf_init(struct bpf_object *obj)
302{
303 int err = 0;
304 GElf_Ehdr *ep;
305
306 if (obj_elf_valid(obj)) {
307 pr_warning("elf init: internal error\n");
308 return -EEXIST;
309 }
310
311 if (obj->efile.obj_buf_sz > 0) {
312 /*
313 * obj_buf should have been validated by
314 * bpf_object__open_buffer().
315 */
316 obj->efile.elf = elf_memory(obj->efile.obj_buf,
317 obj->efile.obj_buf_sz);
318 } else {
319 obj->efile.fd = open(obj->path, O_RDONLY);
320 if (obj->efile.fd < 0) {
321 pr_warning("failed to open %s: %s\n", obj->path,
322 strerror(errno));
323 return -errno;
324 }
325
326 obj->efile.elf = elf_begin(obj->efile.fd,
327 LIBBPF_ELF_C_READ_MMAP,
328 NULL);
329 }
330
331 if (!obj->efile.elf) {
332 pr_warning("failed to open %s as ELF file\n",
333 obj->path);
334 err = -EINVAL;
335 goto errout;
336 }
337
338 if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
339 pr_warning("failed to get EHDR from %s\n",
340 obj->path);
341 err = -EINVAL;
342 goto errout;
343 }
344 ep = &obj->efile.ehdr;
345
346 if ((ep->e_type != ET_REL) || (ep->e_machine != 0)) {
347 pr_warning("%s is not an eBPF object file\n",
348 obj->path);
349 err = -EINVAL;
350 goto errout;
351 }
352
353 return 0;
354errout:
355 bpf_object__elf_finish(obj);
356 return err;
357}
358
359static int
360bpf_object__check_endianness(struct bpf_object *obj)
361{
362 static unsigned int const endian = 1;
363
364 switch (obj->efile.ehdr.e_ident[EI_DATA]) {
365 case ELFDATA2LSB:
366 /* We are big endian, BPF obj is little endian. */
367 if (*(unsigned char const *)&endian != 1)
368 goto mismatch;
369 break;
370
371 case ELFDATA2MSB:
372 /* We are little endian, BPF obj is big endian. */
373 if (*(unsigned char const *)&endian != 0)
374 goto mismatch;
375 break;
376 default:
377 return -EINVAL;
378 }
379
380 return 0;
381
382mismatch:
383 pr_warning("Error: endianness mismatch.\n");
384 return -EINVAL;
385}
386
387static int
388bpf_object__init_license(struct bpf_object *obj,
389 void *data, size_t size)
390{
391 memcpy(obj->license, data,
392 min(size, sizeof(obj->license) - 1));
393 pr_debug("license of %s is %s\n", obj->path, obj->license);
394 return 0;
395}
396
397static int
398bpf_object__init_kversion(struct bpf_object *obj,
399 void *data, size_t size)
400{
401 u32 kver;
402
403 if (size != sizeof(kver)) {
404 pr_warning("invalid kver section in %s\n", obj->path);
405 return -EINVAL;
406 }
407 memcpy(&kver, data, sizeof(kver));
408 obj->kern_version = kver;
409 pr_debug("kernel version of %s is %x\n", obj->path,
410 obj->kern_version);
411 return 0;
412}
413
414static int
415bpf_object__init_maps(struct bpf_object *obj, void *data,
416 size_t size)
417{
418 if (size == 0) {
419 pr_debug("%s doesn't need map definition\n",
420 obj->path);
421 return 0;
422 }
423
424 obj->maps_buf = malloc(size);
425 if (!obj->maps_buf) {
426 pr_warning("malloc maps failed: %s\n", obj->path);
427 return -ENOMEM;
428 }
429
430 obj->maps_buf_sz = size;
431 memcpy(obj->maps_buf, data, size);
432 pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size);
433 return 0;
434}
435
436static int bpf_object__elf_collect(struct bpf_object *obj)
437{
438 Elf *elf = obj->efile.elf;
439 GElf_Ehdr *ep = &obj->efile.ehdr;
440 Elf_Scn *scn = NULL;
441 int idx = 0, err = 0;
442
443 /* Elf is corrupted/truncated, avoid calling elf_strptr. */
444 if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
445 pr_warning("failed to get e_shstrndx from %s\n",
446 obj->path);
447 return -EINVAL;
448 }
449
450 while ((scn = elf_nextscn(elf, scn)) != NULL) {
451 char *name;
452 GElf_Shdr sh;
453 Elf_Data *data;
454
455 idx++;
456 if (gelf_getshdr(scn, &sh) != &sh) {
457 pr_warning("failed to get section header from %s\n",
458 obj->path);
459 err = -EINVAL;
460 goto out;
461 }
462
463 name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
464 if (!name) {
465 pr_warning("failed to get section name from %s\n",
466 obj->path);
467 err = -EINVAL;
468 goto out;
469 }
470
471 data = elf_getdata(scn, 0);
472 if (!data) {
473 pr_warning("failed to get section data from %s(%s)\n",
474 name, obj->path);
475 err = -EINVAL;
476 goto out;
477 }
478 pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n",
479 name, (unsigned long)data->d_size,
480 (int)sh.sh_link, (unsigned long)sh.sh_flags,
481 (int)sh.sh_type);
482
483 if (strcmp(name, "license") == 0)
484 err = bpf_object__init_license(obj,
485 data->d_buf,
486 data->d_size);
487 else if (strcmp(name, "version") == 0)
488 err = bpf_object__init_kversion(obj,
489 data->d_buf,
490 data->d_size);
491 else if (strcmp(name, "maps") == 0)
492 err = bpf_object__init_maps(obj, data->d_buf,
493 data->d_size);
494 else if (sh.sh_type == SHT_SYMTAB) {
495 if (obj->efile.symbols) {
496 pr_warning("bpf: multiple SYMTAB in %s\n",
497 obj->path);
498 err = -EEXIST;
499 } else
500 obj->efile.symbols = data;
501 } else if ((sh.sh_type == SHT_PROGBITS) &&
502 (sh.sh_flags & SHF_EXECINSTR) &&
503 (data->d_size > 0)) {
504 err = bpf_object__add_program(obj, data->d_buf,
505 data->d_size, name, idx);
506 if (err) {
507 char errmsg[128];
508 strerror_r(-err, errmsg, sizeof(errmsg));
509 pr_warning("failed to alloc program %s (%s): %s",
510 name, obj->path, errmsg);
511 }
512 } else if (sh.sh_type == SHT_REL) {
513 void *reloc = obj->efile.reloc;
514 int nr_reloc = obj->efile.nr_reloc + 1;
515
516 reloc = realloc(reloc,
517 sizeof(*obj->efile.reloc) * nr_reloc);
518 if (!reloc) {
519 pr_warning("realloc failed\n");
520 err = -ENOMEM;
521 } else {
522 int n = nr_reloc - 1;
523
524 obj->efile.reloc = reloc;
525 obj->efile.nr_reloc = nr_reloc;
526
527 obj->efile.reloc[n].shdr = sh;
528 obj->efile.reloc[n].data = data;
529 }
530 }
531 if (err)
532 goto out;
533 }
534out:
535 return err;
536}
537
538static struct bpf_program *
539bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
540{
541 struct bpf_program *prog;
542 size_t i;
543
544 for (i = 0; i < obj->nr_programs; i++) {
545 prog = &obj->programs[i];
546 if (prog->idx == idx)
547 return prog;
548 }
549 return NULL;
550}
551
552static int
553bpf_program__collect_reloc(struct bpf_program *prog,
554 size_t nr_maps, GElf_Shdr *shdr,
555 Elf_Data *data, Elf_Data *symbols)
556{
557 int i, nrels;
558
559 pr_debug("collecting relocating info for: '%s'\n",
560 prog->section_name);
561 nrels = shdr->sh_size / shdr->sh_entsize;
562
563 prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
564 if (!prog->reloc_desc) {
565 pr_warning("failed to alloc memory in relocation\n");
566 return -ENOMEM;
567 }
568 prog->nr_reloc = nrels;
569
570 for (i = 0; i < nrels; i++) {
571 GElf_Sym sym;
572 GElf_Rel rel;
573 unsigned int insn_idx;
574 struct bpf_insn *insns = prog->insns;
575 size_t map_idx;
576
577 if (!gelf_getrel(data, i, &rel)) {
578 pr_warning("relocation: failed to get %d reloc\n", i);
579 return -EINVAL;
580 }
581
582 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
583 pr_debug("relocation: insn_idx=%u\n", insn_idx);
584
585 if (!gelf_getsym(symbols,
586 GELF_R_SYM(rel.r_info),
587 &sym)) {
588 pr_warning("relocation: symbol %"PRIx64" not found\n",
589 GELF_R_SYM(rel.r_info));
590 return -EINVAL;
591 }
592
593 if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
594 pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
595 insn_idx, insns[insn_idx].code);
596 return -EINVAL;
597 }
598
599 map_idx = sym.st_value / sizeof(struct bpf_map_def);
600 if (map_idx >= nr_maps) {
601 pr_warning("bpf relocation: map_idx %d large than %d\n",
602 (int)map_idx, (int)nr_maps - 1);
603 return -EINVAL;
604 }
605
606 prog->reloc_desc[i].insn_idx = insn_idx;
607 prog->reloc_desc[i].map_idx = map_idx;
608 }
609 return 0;
610}
611
612static int
613bpf_object__create_maps(struct bpf_object *obj)
614{
615 unsigned int i;
616 size_t nr_maps;
617 int *pfd;
618
619 nr_maps = obj->maps_buf_sz / sizeof(struct bpf_map_def);
620 if (!obj->maps_buf || !nr_maps) {
621 pr_debug("don't need create maps for %s\n",
622 obj->path);
623 return 0;
624 }
625
626 obj->map_fds = malloc(sizeof(int) * nr_maps);
627 if (!obj->map_fds) {
628 pr_warning("realloc perf_bpf_map_fds failed\n");
629 return -ENOMEM;
630 }
631 obj->nr_map_fds = nr_maps;
632
633 /* fill all fd with -1 */
634 memset(obj->map_fds, -1, sizeof(int) * nr_maps);
635
636 pfd = obj->map_fds;
637 for (i = 0; i < nr_maps; i++) {
638 struct bpf_map_def def;
639
640 def = *(struct bpf_map_def *)(obj->maps_buf +
641 i * sizeof(struct bpf_map_def));
642
643 *pfd = bpf_create_map(def.type,
644 def.key_size,
645 def.value_size,
646 def.max_entries);
647 if (*pfd < 0) {
648 size_t j;
649 int err = *pfd;
650
651 pr_warning("failed to create map: %s\n",
652 strerror(errno));
653 for (j = 0; j < i; j++)
654 zclose(obj->map_fds[j]);
655 obj->nr_map_fds = 0;
656 zfree(&obj->map_fds);
657 return err;
658 }
659 pr_debug("create map: fd=%d\n", *pfd);
660 pfd++;
661 }
662
663 zfree(&obj->maps_buf);
664 obj->maps_buf_sz = 0;
665 return 0;
666}
667
668static int
669bpf_program__relocate(struct bpf_program *prog, int *map_fds)
670{
671 int i;
672
673 if (!prog || !prog->reloc_desc)
674 return 0;
675
676 for (i = 0; i < prog->nr_reloc; i++) {
677 int insn_idx, map_idx;
678 struct bpf_insn *insns = prog->insns;
679
680 insn_idx = prog->reloc_desc[i].insn_idx;
681 map_idx = prog->reloc_desc[i].map_idx;
682
683 if (insn_idx >= (int)prog->insns_cnt) {
684 pr_warning("relocation out of range: '%s'\n",
685 prog->section_name);
686 return -ERANGE;
687 }
688 insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
689 insns[insn_idx].imm = map_fds[map_idx];
690 }
691
692 zfree(&prog->reloc_desc);
693 prog->nr_reloc = 0;
694 return 0;
695}
696
697
698static int
699bpf_object__relocate(struct bpf_object *obj)
700{
701 struct bpf_program *prog;
702 size_t i;
703 int err;
704
705 for (i = 0; i < obj->nr_programs; i++) {
706 prog = &obj->programs[i];
707
708 err = bpf_program__relocate(prog, obj->map_fds);
709 if (err) {
710 pr_warning("failed to relocate '%s'\n",
711 prog->section_name);
712 return err;
713 }
714 }
715 return 0;
716}
717
718static int bpf_object__collect_reloc(struct bpf_object *obj)
719{
720 int i, err;
721
722 if (!obj_elf_valid(obj)) {
723 pr_warning("Internal error: elf object is closed\n");
724 return -EINVAL;
725 }
726
727 for (i = 0; i < obj->efile.nr_reloc; i++) {
728 GElf_Shdr *shdr = &obj->efile.reloc[i].shdr;
729 Elf_Data *data = obj->efile.reloc[i].data;
730 int idx = shdr->sh_info;
731 struct bpf_program *prog;
732 size_t nr_maps = obj->maps_buf_sz /
733 sizeof(struct bpf_map_def);
734
735 if (shdr->sh_type != SHT_REL) {
736 pr_warning("internal error at %d\n", __LINE__);
737 return -EINVAL;
738 }
739
740 prog = bpf_object__find_prog_by_idx(obj, idx);
741 if (!prog) {
742 pr_warning("relocation failed: no %d section\n",
743 idx);
744 return -ENOENT;
745 }
746
747 err = bpf_program__collect_reloc(prog, nr_maps,
748 shdr, data,
749 obj->efile.symbols);
750 if (err)
751 return -EINVAL;
752 }
753 return 0;
754}
755
756static int
757load_program(struct bpf_insn *insns, int insns_cnt,
758 char *license, u32 kern_version, int *pfd)
759{
760 int ret;
761 char *log_buf;
762
763 if (!insns || !insns_cnt)
764 return -EINVAL;
765
766 log_buf = malloc(BPF_LOG_BUF_SIZE);
767 if (!log_buf)
768 pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
769
770 ret = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
771 insns_cnt, license, kern_version,
772 log_buf, BPF_LOG_BUF_SIZE);
773
774 if (ret >= 0) {
775 *pfd = ret;
776 ret = 0;
777 goto out;
778 }
779
780 ret = -EINVAL;
781 pr_warning("load bpf program failed: %s\n", strerror(errno));
782
783 if (log_buf) {
784 pr_warning("-- BEGIN DUMP LOG ---\n");
785 pr_warning("\n%s\n", log_buf);
786 pr_warning("-- END LOG --\n");
787 }
788
789out:
790 free(log_buf);
791 return ret;
792}
793
794static int
795bpf_program__load(struct bpf_program *prog,
796 char *license, u32 kern_version)
797{
798 int err, fd;
799
800 err = load_program(prog->insns, prog->insns_cnt,
801 license, kern_version, &fd);
802 if (!err)
803 prog->fd = fd;
804
805 if (err)
806 pr_warning("failed to load program '%s'\n",
807 prog->section_name);
808 zfree(&prog->insns);
809 prog->insns_cnt = 0;
810 return err;
811}
812
813static int
814bpf_object__load_progs(struct bpf_object *obj)
815{
816 size_t i;
817 int err;
818
819 for (i = 0; i < obj->nr_programs; i++) {
820 err = bpf_program__load(&obj->programs[i],
821 obj->license,
822 obj->kern_version);
823 if (err)
824 return err;
825 }
826 return 0;
827}
828
829static int bpf_object__validate(struct bpf_object *obj)
830{
831 if (obj->kern_version == 0) {
832 pr_warning("%s doesn't provide kernel version\n",
833 obj->path);
834 return -EINVAL;
835 }
836 return 0;
837}
838
839static struct bpf_object *
840__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
841{
842 struct bpf_object *obj;
843
844 if (elf_version(EV_CURRENT) == EV_NONE) {
845 pr_warning("failed to init libelf for %s\n", path);
846 return NULL;
847 }
848
849 obj = bpf_object__new(path, obj_buf, obj_buf_sz);
850 if (!obj)
851 return NULL;
852
853 if (bpf_object__elf_init(obj))
854 goto out;
855 if (bpf_object__check_endianness(obj))
856 goto out;
857 if (bpf_object__elf_collect(obj))
858 goto out;
859 if (bpf_object__collect_reloc(obj))
860 goto out;
861 if (bpf_object__validate(obj))
862 goto out;
863
864 bpf_object__elf_finish(obj);
865 return obj;
866out:
867 bpf_object__close(obj);
868 return NULL;
869}
870
871struct bpf_object *bpf_object__open(const char *path)
872{
873 /* param validation */
874 if (!path)
875 return NULL;
876
877 pr_debug("loading %s\n", path);
878
879 return __bpf_object__open(path, NULL, 0);
880}
881
882struct bpf_object *bpf_object__open_buffer(void *obj_buf,
883 size_t obj_buf_sz)
884{
885 /* param validation */
886 if (!obj_buf || obj_buf_sz <= 0)
887 return NULL;
888
889 pr_debug("loading object from buffer\n");
890
891 return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz);
892}
893
894int bpf_object__unload(struct bpf_object *obj)
895{
896 size_t i;
897
898 if (!obj)
899 return -EINVAL;
900
901 for (i = 0; i < obj->nr_map_fds; i++)
902 zclose(obj->map_fds[i]);
903 zfree(&obj->map_fds);
904 obj->nr_map_fds = 0;
905
906 for (i = 0; i < obj->nr_programs; i++)
907 bpf_program__unload(&obj->programs[i]);
908
909 return 0;
910}
911
912int bpf_object__load(struct bpf_object *obj)
913{
914 if (!obj)
915 return -EINVAL;
916
917 if (obj->loaded) {
918 pr_warning("object should not be loaded twice\n");
919 return -EINVAL;
920 }
921
922 obj->loaded = true;
923 if (bpf_object__create_maps(obj))
924 goto out;
925 if (bpf_object__relocate(obj))
926 goto out;
927 if (bpf_object__load_progs(obj))
928 goto out;
929
930 return 0;
931out:
932 bpf_object__unload(obj);
933 pr_warning("failed to load object '%s'\n", obj->path);
934 return -EINVAL;
935}
936
937void bpf_object__close(struct bpf_object *obj)
938{
939 size_t i;
940
941 if (!obj)
942 return;
943
944 bpf_object__elf_finish(obj);
945 bpf_object__unload(obj);
946
947 zfree(&obj->maps_buf);
948
949 if (obj->programs && obj->nr_programs) {
950 for (i = 0; i < obj->nr_programs; i++)
951 bpf_program__exit(&obj->programs[i]);
952 }
953 zfree(&obj->programs);
954
955 list_del(&obj->list);
956 free(obj);
957}
958
959struct bpf_object *
960bpf_object__next(struct bpf_object *prev)
961{
962 struct bpf_object *next;
963
964 if (!prev)
965 next = list_first_entry(&bpf_objects_list,
966 struct bpf_object,
967 list);
968 else
969 next = list_next_entry(prev, list);
970
971 /* Empty list is noticed here so don't need checking on entry. */
972 if (&next->list == &bpf_objects_list)
973 return NULL;
974
975 return next;
976}
977
978struct bpf_program *
979bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
980{
981 size_t idx;
982
983 if (!obj->programs)
984 return NULL;
985 /* First handler */
986 if (prev == NULL)
987 return &obj->programs[0];
988
989 if (prev->obj != obj) {
990 pr_warning("error: program handler doesn't match object\n");
991 return NULL;
992 }
993
994 idx = (prev - obj->programs) + 1;
995 if (idx >= obj->nr_programs)
996 return NULL;
997 return &obj->programs[idx];
998}
999
1000int bpf_program__set_private(struct bpf_program *prog,
1001 void *priv,
1002 bpf_program_clear_priv_t clear_priv)
1003{
1004 if (prog->priv && prog->clear_priv)
1005 prog->clear_priv(prog, prog->priv);
1006
1007 prog->priv = priv;
1008 prog->clear_priv = clear_priv;
1009 return 0;
1010}
1011
1012int bpf_program__get_private(struct bpf_program *prog, void **ppriv)
1013{
1014 *ppriv = prog->priv;
1015 return 0;
1016}
1017
1018const char *bpf_program__title(struct bpf_program *prog, bool dup)
1019{
1020 const char *title;
1021
1022 title = prog->section_name;
1023 if (dup) {
1024 title = strdup(title);
1025 if (!title) {
1026 pr_warning("failed to strdup program title\n");
1027 return NULL;
1028 }
1029 }
1030
1031 return title;
1032}
1033
1034int bpf_program__fd(struct bpf_program *prog)
1035{
1036 return prog->fd;
1037}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
new file mode 100644
index 000000000000..ea8adc206b62
--- /dev/null
+++ b/tools/lib/bpf/libbpf.h
@@ -0,0 +1,81 @@
1/*
2 * Common eBPF ELF object loading operations.
3 *
4 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
5 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
6 * Copyright (C) 2015 Huawei Inc.
7 */
8#ifndef __BPF_LIBBPF_H
9#define __BPF_LIBBPF_H
10
11#include <stdio.h>
12#include <stdbool.h>
13
14/*
15 * In include/linux/compiler-gcc.h, __printf is defined. However
16 * it should be better if libbpf.h doesn't depend on Linux header file.
17 * So instead of __printf, here we use gcc attribute directly.
18 */
19typedef int (*libbpf_print_fn_t)(const char *, ...)
20 __attribute__((format(printf, 1, 2)));
21
22void libbpf_set_print(libbpf_print_fn_t warn,
23 libbpf_print_fn_t info,
24 libbpf_print_fn_t debug);
25
26/* Hide internal to user */
27struct bpf_object;
28
29struct bpf_object *bpf_object__open(const char *path);
30struct bpf_object *bpf_object__open_buffer(void *obj_buf,
31 size_t obj_buf_sz);
32void bpf_object__close(struct bpf_object *object);
33
34/* Load/unload object into/from kernel */
35int bpf_object__load(struct bpf_object *obj);
36int bpf_object__unload(struct bpf_object *obj);
37
38struct bpf_object *bpf_object__next(struct bpf_object *prev);
39#define bpf_object__for_each_safe(pos, tmp) \
40 for ((pos) = bpf_object__next(NULL), \
41 (tmp) = bpf_object__next(pos); \
42 (pos) != NULL; \
43 (pos) = (tmp), (tmp) = bpf_object__next(tmp))
44
45/* Accessors of bpf_program. */
46struct bpf_program;
47struct bpf_program *bpf_program__next(struct bpf_program *prog,
48 struct bpf_object *obj);
49
50#define bpf_object__for_each_program(pos, obj) \
51 for ((pos) = bpf_program__next(NULL, (obj)); \
52 (pos) != NULL; \
53 (pos) = bpf_program__next((pos), (obj)))
54
55typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
56 void *);
57
58int bpf_program__set_private(struct bpf_program *prog, void *priv,
59 bpf_program_clear_priv_t clear_priv);
60
61int bpf_program__get_private(struct bpf_program *prog,
62 void **ppriv);
63
64const char *bpf_program__title(struct bpf_program *prog, bool dup);
65
66int bpf_program__fd(struct bpf_program *prog);
67
68/*
69 * We don't need __attribute__((packed)) now since it is
70 * unnecessary for 'bpf_map_def' because they are all aligned.
71 * In addition, using it will trigger -Wpacked warning message,
72 * and will be treated as an error due to -Werror.
73 */
74struct bpf_map_def {
75 unsigned int type;
76 unsigned int key_size;
77 unsigned int value_size;
78 unsigned int max_entries;
79};
80
81#endif
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index cc25f059ab3d..4d885934b919 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -418,7 +418,7 @@ static int func_map_init(struct pevent *pevent)
418} 418}
419 419
420static struct func_map * 420static struct func_map *
421find_func(struct pevent *pevent, unsigned long long addr) 421__find_func(struct pevent *pevent, unsigned long long addr)
422{ 422{
423 struct func_map *func; 423 struct func_map *func;
424 struct func_map key; 424 struct func_map key;
@@ -434,6 +434,71 @@ find_func(struct pevent *pevent, unsigned long long addr)
434 return func; 434 return func;
435} 435}
436 436
437struct func_resolver {
438 pevent_func_resolver_t *func;
439 void *priv;
440 struct func_map map;
441};
442
443/**
444 * pevent_set_function_resolver - set an alternative function resolver
445 * @pevent: handle for the pevent
446 * @resolver: function to be used
447 * @priv: resolver function private state.
448 *
449 * Some tools may have already a way to resolve kernel functions, allow them to
450 * keep using it instead of duplicating all the entries inside
451 * pevent->funclist.
452 */
453int pevent_set_function_resolver(struct pevent *pevent,
454 pevent_func_resolver_t *func, void *priv)
455{
456 struct func_resolver *resolver = malloc(sizeof(*resolver));
457
458 if (resolver == NULL)
459 return -1;
460
461 resolver->func = func;
462 resolver->priv = priv;
463
464 free(pevent->func_resolver);
465 pevent->func_resolver = resolver;
466
467 return 0;
468}
469
470/**
471 * pevent_reset_function_resolver - reset alternative function resolver
472 * @pevent: handle for the pevent
473 *
474 * Stop using whatever alternative resolver was set, use the default
475 * one instead.
476 */
477void pevent_reset_function_resolver(struct pevent *pevent)
478{
479 free(pevent->func_resolver);
480 pevent->func_resolver = NULL;
481}
482
483static struct func_map *
484find_func(struct pevent *pevent, unsigned long long addr)
485{
486 struct func_map *map;
487
488 if (!pevent->func_resolver)
489 return __find_func(pevent, addr);
490
491 map = &pevent->func_resolver->map;
492 map->mod = NULL;
493 map->addr = addr;
494 map->func = pevent->func_resolver->func(pevent->func_resolver->priv,
495 &map->addr, &map->mod);
496 if (map->func == NULL)
497 return NULL;
498
499 return map;
500}
501
437/** 502/**
438 * pevent_find_function - find a function by a given address 503 * pevent_find_function - find a function by a given address
439 * @pevent: handle for the pevent 504 * @pevent: handle for the pevent
@@ -1680,6 +1745,9 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
1680 type = process_arg(event, left, &token); 1745 type = process_arg(event, left, &token);
1681 1746
1682 again: 1747 again:
1748 if (type == EVENT_ERROR)
1749 goto out_free;
1750
1683 /* Handle other operations in the arguments */ 1751 /* Handle other operations in the arguments */
1684 if (type == EVENT_OP && strcmp(token, ":") != 0) { 1752 if (type == EVENT_OP && strcmp(token, ":") != 0) {
1685 type = process_op(event, left, &token); 1753 type = process_op(event, left, &token);
@@ -1939,6 +2007,12 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
1939 goto out_warn_free; 2007 goto out_warn_free;
1940 2008
1941 type = process_arg_token(event, right, tok, type); 2009 type = process_arg_token(event, right, tok, type);
2010 if (type == EVENT_ERROR) {
2011 free_arg(right);
2012 /* token was freed in process_arg_token() via *tok */
2013 token = NULL;
2014 goto out_free;
2015 }
1942 2016
1943 if (right->type == PRINT_OP && 2017 if (right->type == PRINT_OP &&
1944 get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { 2018 get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
@@ -4754,6 +4828,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
4754 case 'z': 4828 case 'z':
4755 case 'Z': 4829 case 'Z':
4756 case '0' ... '9': 4830 case '0' ... '9':
4831 case '-':
4757 goto cont_process; 4832 goto cont_process;
4758 case 'p': 4833 case 'p':
4759 if (pevent->long_size == 4) 4834 if (pevent->long_size == 4)
@@ -6564,6 +6639,7 @@ void pevent_free(struct pevent *pevent)
6564 free(pevent->trace_clock); 6639 free(pevent->trace_clock);
6565 free(pevent->events); 6640 free(pevent->events);
6566 free(pevent->sort_events); 6641 free(pevent->sort_events);
6642 free(pevent->func_resolver);
6567 6643
6568 free(pevent); 6644 free(pevent);
6569} 6645}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 063b1971eb35..204befb05a17 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -453,6 +453,10 @@ struct cmdline_list;
453struct func_map; 453struct func_map;
454struct func_list; 454struct func_list;
455struct event_handler; 455struct event_handler;
456struct func_resolver;
457
458typedef char *(pevent_func_resolver_t)(void *priv,
459 unsigned long long *addrp, char **modp);
456 460
457struct pevent { 461struct pevent {
458 int ref_count; 462 int ref_count;
@@ -481,6 +485,7 @@ struct pevent {
481 int cmdline_count; 485 int cmdline_count;
482 486
483 struct func_map *func_map; 487 struct func_map *func_map;
488 struct func_resolver *func_resolver;
484 struct func_list *funclist; 489 struct func_list *funclist;
485 unsigned int func_count; 490 unsigned int func_count;
486 491
@@ -611,6 +616,9 @@ enum trace_flag_type {
611 TRACE_FLAG_SOFTIRQ = 0x10, 616 TRACE_FLAG_SOFTIRQ = 0x10,
612}; 617};
613 618
619int pevent_set_function_resolver(struct pevent *pevent,
620 pevent_func_resolver_t *func, void *priv);
621void pevent_reset_function_resolver(struct pevent *pevent);
614int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); 622int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
615int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock); 623int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock);
616int pevent_register_function(struct pevent *pevent, char *name, 624int pevent_register_function(struct pevent *pevent, char *name,
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 09db62ba5786..3d1bb802dbf4 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -29,3 +29,4 @@ config.mak.autogen
29*.pyc 29*.pyc
30*.pyo 30*.pyo
31.config-detected 31.config-detected
32util/intel-pt-decoder/inat-tables.c
diff --git a/tools/perf/Build b/tools/perf/Build
index b77370ef7005..72237455b400 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -35,6 +35,7 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
35CFLAGS_builtin-help.o += $(paths) 35CFLAGS_builtin-help.o += $(paths)
36CFLAGS_builtin-timechart.o += $(paths) 36CFLAGS_builtin-timechart.o += $(paths)
37CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE 37CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE
38CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
38 39
39libperf-y += util/ 40libperf-y += util/
40libperf-y += arch/ 41libperf-y += arch/
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
new file mode 100644
index 000000000000..8bdc93bd7fdb
--- /dev/null
+++ b/tools/perf/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
1Intel Branch Trace Store
2========================
3
4Overview
5========
6
7Intel BTS could be regarded as a predecessor to Intel PT and has some
8similarities because it can also identify every branch a program takes. A
9notable difference is that Intel BTS has no timing information and as a
10consequence the present implementation is limited to per-thread recording.
11
12While decoding Intel BTS does not require walking the object code, the object
13code is still needed to pair up calls and returns correctly, consequently much
14of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
15documentation and consider that the PMU 'intel_bts' can usually be used in
16place of 'intel_pt' in the examples provided, with the proviso that per-thread
17recording must also be stipulated i.e. the --per-thread option for
18'perf record'.
19
20
21perf record
22===========
23
24new event
25---------
26
27The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
28option is:
29
30 -e intel_bts//
31
32Currently Intel BTS is limited to per-thread tracing so the --per-thread option
33is also needed.
34
35
36snapshot option
37---------------
38
39The snapshot option is the same as Intel PT (refer Intel PT documentation).
40
41
42auxtrace mmap size option
43-----------------------
44
45The mmap size option is the same as Intel PT (refer Intel PT documentation).
46
47
48perf script
49===========
50
51By default, perf script will decode trace data found in the perf.data file.
52This can be further controlled by option --itrace. The --itrace option is
53the same as Intel PT (refer Intel PT documentation) except that neither
54"instructions" events nor "transactions" events (and consequently call
55chains) are supported.
56
57To disable trace decoding entirely, use the option --no-itrace.
58
59
60dump option
61-----------
62
63perf script has an option (-D) to "dump" the events i.e. display the binary
64data.
65
66When -D is used, Intel BTS packets are displayed.
67
68To disable the display of Intel BTS packets, combine the -D option with
69--no-itrace.
70
71
72perf report
73===========
74
75By default, perf report will decode trace data found in the perf.data file.
76This can be further controlled by new option --itrace exactly the same as
77perf script.
78
79
80perf inject
81===========
82
83perf inject also accepts the --itrace option in which case tracing data is
84removed and replaced with the synthesized events. e.g.
85
86 perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
new file mode 100644
index 000000000000..4a0501d7a3b4
--- /dev/null
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -0,0 +1,766 @@
1Intel Processor Trace
2=====================
3
4Overview
5========
6
7Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
8collects information about software execution such as control flow, execution
9modes and timings and formats it into highly compressed binary packets.
10Technical details are documented in the Intel 64 and IA-32 Architectures
11Software Developer Manuals, Chapter 36 Intel Processor Trace.
12
13Intel PT is first supported in Intel Core M and 5th generation Intel Core
14processors that are based on the Intel micro-architecture code name Broadwell.
15
16Trace data is collected by 'perf record' and stored within the perf.data file.
17See below for options to 'perf record'.
18
19Trace data must be 'decoded' which involves walking the object code and matching
20the trace data packets. For example a TNT packet only tells whether a
21conditional branch was taken or not taken, so to make use of that packet the
22decoder must know precisely which instruction was being executed.
23
24Decoding is done on-the-fly. The decoder outputs samples in the same format as
25samples output by perf hardware events, for example as though the "instructions"
26or "branches" events had been recorded. Presently 3 tools support this:
27'perf script', 'perf report' and 'perf inject'. See below for more information
28on using those tools.
29
30The main distinguishing feature of Intel PT is that the decoder can determine
31the exact flow of software execution. Intel PT can be used to understand why
32and how did software get to a certain point, or behave a certain way. The
33software does not have to be recompiled, so Intel PT works with debug or release
34builds, however the executed images are needed - which makes use in JIT-compiled
35environments, or with self-modified code, a challenge. Also symbols need to be
36provided to make sense of addresses.
37
38A limitation of Intel PT is that it produces huge amounts of trace data
39(hundreds of megabytes per second per core) which takes a long time to decode,
40for example two or three orders of magnitude longer than it took to collect.
41Another limitation is the performance impact of tracing, something that will
42vary depending on the use-case and architecture.
43
44
45Quickstart
46==========
47
48It is important to start small. That is because it is easy to capture vastly
49more data than can possibly be processed.
50
51The simplest thing to do with Intel PT is userspace profiling of small programs.
52Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
53
54 perf record -e intel_pt//u ls
55
56And profiled with 'perf report' e.g.
57
58 perf report
59
60To also trace kernel space presents a problem, namely kernel self-modifying
61code. A fairly good kernel image is available in /proc/kcore but to get an
62accurate image a copy of /proc/kcore needs to be made under the same conditions
63as the data capture. A script perf-with-kcore can do that, but beware that the
64script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
65locally from the source tree you can do:
66
67 ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
68
69which will create a directory named 'pt_ls' and put the perf.data file and
70copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
71'perf report' becomes:
72
73 ~/libexec/perf-core/perf-with-kcore report pt_ls
74
75Because samples are synthesized after-the-fact, the sampling period can be
76selected for reporting. e.g. sample every microsecond
77
78 ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
79
80See the sections below for more information about the --itrace option.
81
82Beware the smaller the period, the more samples that are produced, and the
83longer it takes to process them.
84
85Also note that the coarseness of Intel PT timing information will start to
86distort the statistical value of the sampling as the sampling period becomes
87smaller.
88
89To represent software control flow, "branches" samples are produced. By default
90a branch sample is synthesized for every single branch. To get an idea what
91data is available you can use the 'perf script' tool with no parameters, which
92will list all the samples.
93
94 perf record -e intel_pt//u ls
95 perf script
96
97An interesting field that is not printed by default is 'flags' which can be
98displayed as follows:
99
100 perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
101
102The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
103system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
104in transaction, respectively.
105
106While it is possible to create scripts to analyze the data, an alternative
107approach is available to export the data to a postgresql database. Refer to
108script export-to-postgresql.py for more details, and to script
109call-graph-from-postgresql.py for an example of using the database.
110
111As mentioned above, it is easy to capture too much data. One way to limit the
112data captured is to use 'snapshot' mode which is explained further below.
113Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
114
115Another problem that will be experienced is decoder errors. They can be caused
116by inability to access the executed image, self-modified or JIT-ed code, or the
117inability to match side-band information (such as context switches and mmaps)
118which results in the decoder not knowing what code was executed.
119
120There is also the problem of perf not being able to copy the data fast enough,
121resulting in data lost because the buffer was full. See 'Buffer handling' below
122for more details.
123
124
125perf record
126===========
127
128new event
129---------
130
131The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
132selected by providing the PMU name followed by the "config" separated by slashes.
133An enhancement has been made to allow default "config" e.g. the option
134
135 -e intel_pt//
136
137will use a default config value. Currently that is the same as
138
139 -e intel_pt/tsc,noretcomp=0/
140
141which is the same as
142
143 -e intel_pt/tsc=1,noretcomp=0/
144
145Note there are now new config terms - see section 'config terms' further below.
146
147The config terms are listed in /sys/devices/intel_pt/format. They are bit
148fields within the config member of the struct perf_event_attr which is
149passed to the kernel by the perf_event_open system call. They correspond to bit
150fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
151
152 $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
153 /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
154 /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
155 /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
156 /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
157 /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
158 /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
159 /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
160
161Note that the default config must be overridden for each term i.e.
162
163 -e intel_pt/noretcomp=0/
164
165is the same as:
166
167 -e intel_pt/tsc=1,noretcomp=0/
168
169So, to disable TSC packets use:
170
171 -e intel_pt/tsc=0/
172
173It is also possible to specify the config value explicitly:
174
175 -e intel_pt/config=0x400/
176
177Note that, as with all events, the event is suffixed with event modifiers:
178
179 u userspace
180 k kernel
181 h hypervisor
182 G guest
183 H host
184 p precise ip
185
186'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
187'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
188meaningful for Intel PT.
189
190perf_event_attr is displayed if the -vv option is used e.g.
191
192 ------------------------------------------------------------
193 perf_event_attr:
194 type 6
195 size 112
196 config 0x400
197 { sample_period, sample_freq } 1
198 sample_type IP|TID|TIME|CPU|IDENTIFIER
199 read_format ID
200 disabled 1
201 inherit 1
202 exclude_kernel 1
203 exclude_hv 1
204 enable_on_exec 1
205 sample_id_all 1
206 ------------------------------------------------------------
207 sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
208 sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
209 sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
210 sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
211 ------------------------------------------------------------
212
213
214config terms
215------------
216
217The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
218Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
219Some of the features are reflect in new config terms. All the config terms are
220described below.
221
222tsc Always supported. Produces TSC timestamp packets to provide
223 timing information. In some cases it is possible to decode
224 without timing information, for example a per-thread context
225 that does not overlap executable memory maps.
226
227 The default config selects tsc (i.e. tsc=1).
228
229noretcomp Always supported. Disables "return compression" so a TIP packet
230 is produced when a function returns. Causes more packets to be
231 produced but might make decoding more reliable.
232
233 The default config does not select noretcomp (i.e. noretcomp=0).
234
235psb_period Allows the frequency of PSB packets to be specified.
236
237 The PSB packet is a synchronization packet that provides a
238 starting point for decoding or recovery from errors.
239
240 Support for psb_period is indicated by:
241
242 /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
243
244 which contains "1" if the feature is supported and "0"
245 otherwise.
246
247 Valid values are given by:
248
249 /sys/bus/event_source/devices/intel_pt/caps/psb_periods
250
251 which contains a hexadecimal value, the bits of which represent
252 valid values e.g. bit 2 set means value 2 is valid.
253
254 The psb_period value is converted to the approximate number of
255 trace bytes between PSB packets as:
256
257 2 ^ (value + 11)
258
259 e.g. value 3 means 16KiB bytes between PSBs
260
261 If an invalid value is entered, the error message
262 will give a list of valid values e.g.
263
264 $ perf record -e intel_pt/psb_period=15/u uname
265 Invalid psb_period for intel_pt. Valid values are: 0-5
266
267 If MTC packets are selected, the default config selects a value
268 of 3 (i.e. psb_period=3) or the nearest lower value that is
269 supported (0 is always supported). Otherwise the default is 0.
270
271 If decoding is expected to be reliable and the buffer is large
272 then a large PSB period can be used.
273
274 Because a TSC packet is produced with PSB, the PSB period can
275 also affect the granularity to timing information in the absence
276 of MTC or CYC.
277
278mtc Produces MTC timing packets.
279
280 MTC packets provide finer grain timestamp information than TSC
281 packets. MTC packets record time using the hardware crystal
282 clock (CTC) which is related to TSC packets using a TMA packet.
283
284 Support for this feature is indicated by:
285
286 /sys/bus/event_source/devices/intel_pt/caps/mtc
287
288 which contains "1" if the feature is supported and
289 "0" otherwise.
290
291 The frequency of MTC packets can also be specified - see
292 mtc_period below.
293
294mtc_period Specifies how frequently MTC packets are produced - see mtc
295 above for how to determine if MTC packets are supported.
296
297 Valid values are given by:
298
299 /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
300
301 which contains a hexadecimal value, the bits of which represent
302 valid values e.g. bit 2 set means value 2 is valid.
303
304 The mtc_period value is converted to the MTC frequency as:
305
306 CTC-frequency / (2 ^ value)
307
308 e.g. value 3 means one eighth of CTC-frequency
309
310 Where CTC is the hardware crystal clock, the frequency of which
311 can be related to TSC via values provided in cpuid leaf 0x15.
312
313 If an invalid value is entered, the error message
314 will give a list of valid values e.g.
315
316 $ perf record -e intel_pt/mtc_period=15/u uname
317 Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
318
319 The default value is 3 or the nearest lower value
320 that is supported (0 is always supported).
321
322cyc Produces CYC timing packets.
323
324 CYC packets provide even finer grain timestamp information than
325 MTC and TSC packets. A CYC packet contains the number of CPU
326 cycles since the last CYC packet. Unlike MTC and TSC packets,
327 CYC packets are only sent when another packet is also sent.
328
329 Support for this feature is indicated by:
330
331 /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
332
333 which contains "1" if the feature is supported and
334 "0" otherwise.
335
336 The number of CYC packets produced can be reduced by specifying
337 a threshold - see cyc_thresh below.
338
339cyc_thresh Specifies how frequently CYC packets are produced - see cyc
340 above for how to determine if CYC packets are supported.
341
342 Valid cyc_thresh values are given by:
343
344 /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
345
346 which contains a hexadecimal value, the bits of which represent
347 valid values e.g. bit 2 set means value 2 is valid.
348
349 The cyc_thresh value represents the minimum number of CPU cycles
350 that must have passed before a CYC packet can be sent. The
351 number of CPU cycles is:
352
353 2 ^ (value - 1)
354
355 e.g. value 4 means 8 CPU cycles must pass before a CYC packet
356 can be sent. Note a CYC packet is still only sent when another
357 packet is sent, not at, e.g. every 8 CPU cycles.
358
359 If an invalid value is entered, the error message
360 will give a list of valid values e.g.
361
362 $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
363 Invalid cyc_thresh for intel_pt. Valid values are: 0-12
364
365 CYC packets are not requested by default.
366
367no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR.
368
369 It stops the driver resetting the byte count to zero whenever
370 enabling the trace (for example on context switches) which in
371 turn results in no PSB being forced. However some processors
372 will produce a PSB anyway.
373
374 In any case, there is still a PSB when the trace is enabled for
375 the first time.
376
377 no_force_psb can be used to slightly decrease the trace size but
378 may make it harder for the decoder to recover from errors.
379
380 no_force_psb is not selected by default.
381
382
383new snapshot option
384-------------------
385
386The difference between full trace and snapshot from the kernel's perspective is
387that in full trace we don't overwrite trace data that the user hasn't collected
388yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
389the trace run and overwrite older data in the buffer so that whenever something
390interesting happens, we can stop it and grab a snapshot of what was going on
391around that interesting moment.
392
393To select snapshot mode a new option has been added:
394
395 -S
396
397Optionally it can be followed by the snapshot size e.g.
398
399 -S0x100000
400
401The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
402nor snapshot size is specified, then the default is 4MiB for privileged users
403(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
404If an unprivileged user does not specify mmap pages, the mmap pages will be
405reduced as described in the 'new auxtrace mmap size option' section below.
406
407The snapshot size is displayed if the option -vv is used e.g.
408
409 Intel PT snapshot size: %zu
410
411
412new auxtrace mmap size option
413---------------------------
414
415Intel PT buffer size is specified by an addition to the -m option e.g.
416
417 -m,16
418
419selects a buffer size of 16 pages i.e. 64KiB.
420
421Note that the existing functionality of -m is unchanged. The auxtrace mmap size
422is specified by the optional addition of a comma and the value.
423
424The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
425(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
426If an unprivileged user does not specify mmap pages, the mmap pages will be
427reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
428user is likely to get an error as they exceed their mlock limit (Max locked
429memory as shown in /proc/self/limits). Note that perf does not count the first
430512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
431against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
432their mlock limit (which defaults to 64KiB but is not multiplied by the number
433of cpus).
434
435In full-trace mode, powers of two are allowed for buffer size, with a minimum
436size of 2 pages. In snapshot mode, it is the same but the minimum size is
4371 page.
438
439The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
440
441 mmap length 528384
442 auxtrace mmap length 4198400
443
444
445Intel PT modes of operation
446---------------------------
447
448Intel PT can be used in 2 modes:
449 full-trace mode
450 snapshot mode
451
452Full-trace mode traces continuously e.g.
453
454 perf record -e intel_pt//u uname
455
456Snapshot mode captures the available data when a signal is sent e.g.
457
458 perf record -v -e intel_pt//u -S ./loopy 1000000000 &
459 [1] 11435
460 kill -USR2 11435
461 Recording AUX area tracing snapshot
462
463Note that the signal sent is SIGUSR2.
464Note that "Recording AUX area tracing snapshot" is displayed because the -v
465option is used.
466
467The 2 modes cannot be used together.
468
469
470Buffer handling
471---------------
472
473There may be buffer limitations (i.e. single ToPa entry) which means that actual
474buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
475provide other sizes, and in particular an arbitrarily large size, multiple
476buffers are logically concatenated. However an interrupt must be used to switch
477between buffers. That has two potential problems:
478 a) the interrupt may not be handled in time so that the current buffer
479 becomes full and some trace data is lost.
480 b) the interrupts may slow the system and affect the performance
481 results.
482
483If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
484which the tools report as an error.
485
486In full-trace mode, the driver waits for data to be copied out before allowing
487the (logical) buffer to wrap-around. If data is not copied out quickly enough,
488again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
489wait, the intel_pt event gets disabled. Because it is difficult to know when
490that happens, perf tools always re-enable the intel_pt event after copying out
491data.
492
493
494Intel PT and build ids
495----------------------
496
497By default "perf record" post-processes the event stream to find all build ids
498for executables for all addresses sampled. Deliberately, Intel PT is not
499decoded for that purpose (it would take too long). Instead the build ids for
500all executables encountered (due to mmap, comm or task events) are included
501in the perf.data file.
502
503To see buildids included in the perf.data file use the command:
504
505 perf buildid-list
506
507If the perf.data file contains Intel PT data, that is the same as:
508
509 perf buildid-list --with-hits
510
511
512Snapshot mode and event disabling
513---------------------------------
514
515In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
516namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
517collection of side-band information. In order to prevent that, a dummy
518software event has been introduced that permits tracking events (like mmaps) to
519continue to be recorded while intel_pt is disabled. That is important to ensure
520there is complete side-band information to allow the decoding of subsequent
521snapshots.
522
523A test has been created for that. To find the test:
524
525 perf test list
526 ...
527 23: Test using a dummy software event to keep tracking
528
529To run the test:
530
531 perf test 23
532 23: Test using a dummy software event to keep tracking : Ok
533
534
535perf record modes (nothing new here)
536------------------------------------
537
538perf record essentially operates in one of three modes:
539 per thread
540 per cpu
541 workload only
542
543"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
544workload).
545"per cpu" is selected by -C or -a.
546"workload only" mode is selected by not using the other options but providing a
547command to run (i.e. the workload).
548
549In per-thread mode an exact list of threads is traced. There is no inheritance.
550Each thread has its own event buffer.
551
552In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
553option, or processes selected with -p or -u) are traced. Each cpu has its own
554buffer. Inheritance is allowed.
555
556In workload-only mode, the workload is traced but with per-cpu buffers.
557Inheritance is allowed. Note that you can now trace a workload in per-thread
558mode by using the --per-thread option.
559
560
561Privileged vs non-privileged users
562----------------------------------
563
564Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
565have memory limits imposed upon them. That affects what buffer sizes they can
566have as outlined above.
567
568Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
569not permitted to use tracepoints which means there is insufficient side-band
570information to decode Intel PT in per-cpu mode, and potentially workload-only
571mode too if the workload creates new processes.
572
573Note also, that to use tracepoints, read-access to debugfs is required. So if
574debugfs is not mounted or the user does not have read-access, it will again not
575be possible to decode Intel PT in per-cpu mode.
576
577
578sched_switch tracepoint
579-----------------------
580
581The sched_switch tracepoint is used to provide side-band data for Intel PT
582decoding. sched_switch events are automatically added. e.g. the second event
583shown below
584
585 $ perf record -vv -e intel_pt//u uname
586 ------------------------------------------------------------
587 perf_event_attr:
588 type 6
589 size 112
590 config 0x400
591 { sample_period, sample_freq } 1
592 sample_type IP|TID|TIME|CPU|IDENTIFIER
593 read_format ID
594 disabled 1
595 inherit 1
596 exclude_kernel 1
597 exclude_hv 1
598 enable_on_exec 1
599 sample_id_all 1
600 ------------------------------------------------------------
601 sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
602 sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
603 sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
604 sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
605 ------------------------------------------------------------
606 perf_event_attr:
607 type 2
608 size 112
609 config 0x108
610 { sample_period, sample_freq } 1
611 sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
612 read_format ID
613 inherit 1
614 sample_id_all 1
615 exclude_guest 1
616 ------------------------------------------------------------
617 sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
618 sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
619 sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
620 sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
621 ------------------------------------------------------------
622 perf_event_attr:
623 type 1
624 size 112
625 config 0x9
626 { sample_period, sample_freq } 1
627 sample_type IP|TID|TIME|IDENTIFIER
628 read_format ID
629 disabled 1
630 inherit 1
631 exclude_kernel 1
632 exclude_hv 1
633 mmap 1
634 comm 1
635 enable_on_exec 1
636 task 1
637 sample_id_all 1
638 mmap2 1
639 comm_exec 1
640 ------------------------------------------------------------
641 sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
642 sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
643 sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
644 sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
645 mmap size 528384B
646 AUX area mmap length 4194304
647 perf event ring buffer mmapped per cpu
648 Synthesizing auxtrace information
649 Linux
650 [ perf record: Woken up 1 times to write data ]
651 [ perf record: Captured and wrote 0.042 MB perf.data ]
652
653Note, the sched_switch event is only added if the user is permitted to use it
654and only in per-cpu mode.
655
656Note also, the sched_switch event is only added if TSC packets are requested.
657That is because, in the absence of timing information, the sched_switch events
658cannot be matched against the Intel PT trace.
659
660
661perf script
662===========
663
664By default, perf script will decode trace data found in the perf.data file.
665This can be further controlled by new option --itrace.
666
667
668New --itrace option
669-------------------
670
671Having no option is the same as
672
673 --itrace
674
675which, in turn, is the same as
676
677 --itrace=ibxe
678
679The letters are:
680
681 i synthesize "instructions" events
682 b synthesize "branches" events
683 x synthesize "transactions" events
684 c synthesize branches events (calls only)
685 r synthesize branches events (returns only)
686 e synthesize tracing error events
687 d create a debug log
688 g synthesize a call chain (use with i or x)
689
690"Instructions" events look like they were recorded by "perf record -e
691instructions".
692
693"Branches" events look like they were recorded by "perf record -e branches". "c"
694and "r" can be combined to get calls and returns.
695
696"Transactions" events correspond to the start or end of transactions. The
697'flags' field can be used in perf script to determine whether the event is a
698tranasaction start, commit or abort.
699
700Error events are new. They show where the decoder lost the trace. Error events
701are quite important. Users must know if what they are seeing is a complete
702picture or not.
703
704The "d" option will cause the creation of a file "intel_pt.log" containing all
705decoded packets and instructions. Note that this option slows down the decoder
706and that the resulting file may be very large.
707
708In addition, the period of the "instructions" event can be specified. e.g.
709
710 --itrace=i10us
711
712sets the period to 10us i.e. one instruction sample is synthesized for each 10
713microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
714"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
715
716"ms", "us" and "ns" are converted to TSC ticks.
717
718The timing information included with Intel PT does not give the time of every
719instruction. Consequently, for the purpose of sampling, the decoder estimates
720the time since the last timing packet based on 1 tick per instruction. The time
721on the sample is *not* adjusted and reflects the last known value of TSC.
722
723For Intel PT, the default period is 100us.
724
725Also the call chain size (default 16, max. 1024) for instructions or
726transactions events can be specified. e.g.
727
728 --itrace=ig32
729 --itrace=xg32
730
731To disable trace decoding entirely, use the option --no-itrace.
732
733
734dump option
735-----------
736
737perf script has an option (-D) to "dump" the events i.e. display the binary
738data.
739
740When -D is used, Intel PT packets are displayed. The packet decoder does not
741pay attention to PSB packets, but just decodes the bytes - so the packets seen
742by the actual decoder may not be identical in places where the data is corrupt.
743One example of that would be when the buffer-switching interrupt has been too
744slow, and the buffer has been filled completely. In that case, the last packet
745in the buffer might be truncated and immediately followed by a PSB as the trace
746continues in the next buffer.
747
748To disable the display of Intel PT packets, combine the -D option with
749--no-itrace.
750
751
752perf report
753===========
754
755By default, perf report will decode trace data found in the perf.data file.
756This can be further controlled by new option --itrace exactly the same as
757perf script, with the exception that the default is --itrace=igxe.
758
759
760perf inject
761===========
762
763perf inject also accepts the --itrace option in which case tracing data is
764removed and replaced with the synthesized events. e.g.
765
766 perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
new file mode 100644
index 000000000000..2ff946677e3b
--- /dev/null
+++ b/tools/perf/Documentation/itrace.txt
@@ -0,0 +1,22 @@
1 i synthesize instructions events
2 b synthesize branches events
3 c synthesize branches events (calls only)
4 r synthesize branches events (returns only)
5 x synthesize transactions events
6 e synthesize error events
7 d create a debug log
8 g synthesize a call chain (use with i or x)
9
10 The default is all events i.e. the same as --itrace=ibxe
11
12 In addition, the period (default 100000) for instructions events
13 can be specified in units of:
14
15 i instructions
16 t ticks
17 ms milliseconds
18 us microseconds
19 ns nanoseconds (default)
20
21 Also the call chain size (default 16, max. 1024) for instructions or
22 transactions events can be specified.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index bf3d0644bf10..ab632d9fbd7d 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -216,6 +216,10 @@ Suite for evaluating parallel wake calls.
216*requeue*:: 216*requeue*::
217Suite for evaluating requeue calls. 217Suite for evaluating requeue calls.
218 218
219*lock-pi*::
220Suite for evaluating futex lock_pi calls.
221
222
219SEE ALSO 223SEE ALSO
220-------- 224--------
221linkperf:perf[1] 225linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index b876ae312699..0c721c3e37e1 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -48,28 +48,7 @@ OPTIONS
48 Decode Instruction Tracing data, replacing it with synthesized events. 48 Decode Instruction Tracing data, replacing it with synthesized events.
49 Options are: 49 Options are:
50 50
51 i synthesize instructions events 51include::itrace.txt[]
52 b synthesize branches events
53 c synthesize branches events (calls only)
54 r synthesize branches events (returns only)
55 x synthesize transactions events
56 e synthesize error events
57 d create a debug log
58 g synthesize a call chain (use with i or x)
59
60 The default is all events i.e. the same as --itrace=ibxe
61
62 In addition, the period (default 100000) for instructions events
63 can be specified in units of:
64
65 i instructions
66 t ticks
67 ms milliseconds
68 us microseconds
69 ns nanoseconds (default)
70
71 Also the call chain size (default 16, max. 1024) for instructions or
72 transactions events can be specified.
73 52
74SEE ALSO 53SEE ALSO
75-------- 54--------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9b9d9d086680..347a27322ed8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -45,6 +45,21 @@ OPTIONS
45 param1 and param2 are defined as formats for the PMU in: 45 param1 and param2 are defined as formats for the PMU in:
46 /sys/bus/event_sources/devices/<pmu>/format/* 46 /sys/bus/event_sources/devices/<pmu>/format/*
47 47
48 There are also some params which are not defined in .../<pmu>/format/*.
49 These params can be used to overload default config values per event.
50 Here is a list of the params.
51 - 'period': Set event sampling period
52 - 'freq': Set event sampling frequency
53 - 'time': Disable/enable time stamping. Acceptable values are 1 for
54 enabling time stamping. 0 for disabling time stamping.
55 The default is 1.
56 - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
57 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
58 "no" for disable callgraph.
59 - 'stack-size': user stack size for dwarf mode
60 Note: If user explicitly sets options which conflict with the params,
61 the value set by the params will be overridden.
62
48 - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' 63 - a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
49 where addr is the address in memory you want to break in. 64 where addr is the address in memory you want to break in.
50 Access is the memory access type (read, write, execute) it can 65 Access is the memory access type (read, write, execute) it can
@@ -61,7 +76,16 @@ OPTIONS
61 "perf report" to view group events together. 76 "perf report" to view group events together.
62 77
63--filter=<filter>:: 78--filter=<filter>::
64 Event filter. 79 Event filter. This option should follow a event selector (-e) which
80 selects tracepoint event(s). Multiple '--filter' options are combined
81 using '&&'.
82
83--exclude-perf::
84 Don't record events issued by perf itself. This option should follow
85 a event selector (-e) which selects tracepoint event(s). It adds a
86 filter expression 'common_pid != $PERFPID' to filters. If other
87 '--filter' exists, the new filter expression will be combined with
88 them by '&&'.
65 89
66-a:: 90-a::
67--all-cpus:: 91--all-cpus::
@@ -276,6 +300,10 @@ When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
276because the file may be huge. A time out is needed in such cases. 300because the file may be huge. A time out is needed in such cases.
277This option sets the time out limit. The default value is 500 ms. 301This option sets the time out limit. The default value is 500 ms.
278 302
303--switch-events::
304Record context switch events i.e. events of type PERF_RECORD_SWITCH or
305PERF_RECORD_SWITCH_CPU_WIDE.
306
279SEE ALSO 307SEE ALSO
280-------- 308--------
281linkperf:perf-stat[1], linkperf:perf-list[1] 309linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index c33b69f3374f..9c7981bfddad 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -81,6 +81,8 @@ OPTIONS
81 - cpu: cpu number the task ran at the time of sample 81 - cpu: cpu number the task ran at the time of sample
82 - srcline: filename and line number executed at the time of sample. The 82 - srcline: filename and line number executed at the time of sample. The
83 DWARF debugging info must be provided. 83 DWARF debugging info must be provided.
84 - srcfile: file name of the source file of the same. Requires dwarf
85 information.
84 - weight: Event specific weight, e.g. memory latency or transaction 86 - weight: Event specific weight, e.g. memory latency or transaction
85 abort cost. This is the global weight. 87 abort cost. This is the global weight.
86 - local_weight: Local weight version of the weight above. 88 - local_weight: Local weight version of the weight above.
@@ -109,6 +111,7 @@ OPTIONS
109 - mispredict: "N" for predicted branch, "Y" for mispredicted branch 111 - mispredict: "N" for predicted branch, "Y" for mispredicted branch
110 - in_tx: branch in TSX transaction 112 - in_tx: branch in TSX transaction
111 - abort: TSX transaction abort. 113 - abort: TSX transaction abort.
114 - cycles: Cycles in basic block
112 115
113 And default sort keys are changed to comm, dso_from, symbol_from, dso_to 116 And default sort keys are changed to comm, dso_from, symbol_from, dso_to
114 and symbol_to, see '--branch-stack'. 117 and symbol_to, see '--branch-stack'.
@@ -328,31 +331,23 @@ OPTIONS
328--itrace:: 331--itrace::
329 Options for decoding instruction tracing data. The options are: 332 Options for decoding instruction tracing data. The options are:
330 333
331 i synthesize instructions events 334include::itrace.txt[]
332 b synthesize branches events
333 c synthesize branches events (calls only)
334 r synthesize branches events (returns only)
335 x synthesize transactions events
336 e synthesize error events
337 d create a debug log
338 g synthesize a call chain (use with i or x)
339
340 The default is all events i.e. the same as --itrace=ibxe
341
342 In addition, the period (default 100000) for instructions events
343 can be specified in units of:
344
345 i instructions
346 t ticks
347 ms milliseconds
348 us microseconds
349 ns nanoseconds (default)
350
351 Also the call chain size (default 16, max. 1024) for instructions or
352 transactions events can be specified.
353 335
354 To disable decoding entirely, use --no-itrace. 336 To disable decoding entirely, use --no-itrace.
355 337
338--full-source-path::
339 Show the full path for source files for srcline output.
340
341--show-ref-call-graph::
342 When multiple events are sampled, it may not be needed to collect
343 callgraphs for all of them. The sample sites are usually nearby,
344 and it's enough to collect the callgraphs on a reference event.
345 So user can use "call-graph=no" event modifier to disable callgraph
346 for other events to reduce the overhead.
347 However, perf report cannot show callgraphs for the event which
348 disable the callgraph.
349 This option extends the perf report to show reference callgraphs,
350 which collected by reference event, in no callgraph event.
356 351
357include::callchain-overhead-calculation.txt[] 352include::callchain-overhead-calculation.txt[]
358 353
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index c82df572fac2..614b2c7b0293 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -222,6 +222,17 @@ OPTIONS
222--show-mmap-events 222--show-mmap-events
223 Display mmap related events (e.g. MMAP, MMAP2). 223 Display mmap related events (e.g. MMAP, MMAP2).
224 224
225--show-switch-events
226 Display context switch events i.e. events of type PERF_RECORD_SWITCH or
227 PERF_RECORD_SWITCH_CPU_WIDE.
228
229--demangle::
230 Demangle symbol names to human readable form. It's enabled by default,
231 disable with --no-demangle.
232
233--demangle-kernel::
234 Demangle kernel symbol names to human readable form (for C++ kernels).
235
225--header 236--header
226 Show perf.data header. 237 Show perf.data header.
227 238
@@ -231,31 +242,13 @@ OPTIONS
231--itrace:: 242--itrace::
232 Options for decoding instruction tracing data. The options are: 243 Options for decoding instruction tracing data. The options are:
233 244
234 i synthesize instructions events 245include::itrace.txt[]
235 b synthesize branches events
236 c synthesize branches events (calls only)
237 r synthesize branches events (returns only)
238 x synthesize transactions events
239 e synthesize error events
240 d create a debug log
241 g synthesize a call chain (use with i or x)
242
243 The default is all events i.e. the same as --itrace=ibxe
244
245 In addition, the period (default 100000) for instructions events
246 can be specified in units of:
247
248 i instructions
249 t ticks
250 ms milliseconds
251 us microseconds
252 ns nanoseconds (default)
253
254 Also the call chain size (default 16, max. 1024) for instructions or
255 transactions events can be specified.
256 246
257 To disable decoding entirely, use --no-itrace. 247 To disable decoding entirely, use --no-itrace.
258 248
249--full-source-path::
250 Show the full path for source files for srcline output.
251
259SEE ALSO 252SEE ALSO
260-------- 253--------
261linkperf:perf-record[1], linkperf:perf-script-perl[1], 254linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 776aec4d0927..f6a23eb294e7 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -208,6 +208,27 @@ Default is to monitor all CPUS.
208 This option sets the time out limit. The default value is 500 ms. 208 This option sets the time out limit. The default value is 500 ms.
209 209
210 210
211-b::
212--branch-any::
213 Enable taken branch stack sampling. Any type of taken branch may be sampled.
214 This is a shortcut for --branch-filter any. See --branch-filter for more infos.
215
216-j::
217--branch-filter::
218 Enable taken branch stack sampling. Each sample captures a series of consecutive
219 taken branches. The number of branches captured with each sample depends on the
220 underlying hardware, the type of branches of interest, and the executed code.
221 It is possible to select the types of branches captured by enabling filters.
222 For a full list of modifiers please see the perf record manpage.
223
224 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
225 The privilege levels may be omitted, in which case, the privilege levels of the associated
226 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
227 levels are subject to permissions. When sampling on multiple events, branch stack sampling
228 is enabled for all the sampling events. The sampled branch type is the same for all events.
229 The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
230 Note that this feature may not be available on all processors.
231
211INTERACTIVE PROMPTING KEYS 232INTERACTIVE PROMPTING KEYS
212-------------------------- 233--------------------------
213 234
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index d01a0aad5a01..af009bd6e6b7 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h
18tools/arch/x86/include/asm/rmwcc.h 18tools/arch/x86/include/asm/rmwcc.h
19tools/lib/traceevent 19tools/lib/traceevent
20tools/lib/api 20tools/lib/api
21tools/lib/bpf
21tools/lib/hweight.c 22tools/lib/hweight.c
22tools/lib/rbtree.c 23tools/lib/rbtree.c
23tools/lib/symbol/kallsyms.c 24tools/lib/symbol/kallsyms.c
@@ -40,7 +41,6 @@ tools/include/asm-generic/bitops.h
40tools/include/linux/atomic.h 41tools/include/linux/atomic.h
41tools/include/linux/bitops.h 42tools/include/linux/bitops.h
42tools/include/linux/compiler.h 43tools/include/linux/compiler.h
43tools/include/linux/export.h
44tools/include/linux/hash.h 44tools/include/linux/hash.h
45tools/include/linux/kernel.h 45tools/include/linux/kernel.h
46tools/include/linux/list.h 46tools/include/linux/list.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index bba34636b733..d9863cb96f59 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -76,6 +76,12 @@ include config/utilities.mak
76# 76#
77# Define NO_AUXTRACE if you do not want AUX area tracing support 77# Define NO_AUXTRACE if you do not want AUX area tracing support
78 78
79# As per kernel Makefile, avoid funny character set dependencies
80unexport LC_ALL
81LC_COLLATE=C
82LC_NUMERIC=C
83export LC_COLLATE LC_NUMERIC
84
79ifeq ($(srctree),) 85ifeq ($(srctree),)
80srctree := $(patsubst %/,%,$(dir $(shell pwd))) 86srctree := $(patsubst %/,%,$(dir $(shell pwd)))
81srctree := $(patsubst %/,%,$(dir $(srctree))) 87srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -135,6 +141,7 @@ INSTALL = install
135FLEX = flex 141FLEX = flex
136BISON = bison 142BISON = bison
137STRIP = strip 143STRIP = strip
144AWK = awk
138 145
139LIB_DIR = $(srctree)/tools/lib/api/ 146LIB_DIR = $(srctree)/tools/lib/api/
140TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ 147TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
@@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
289 296
290PERF_IN := $(OUTPUT)perf-in.o 297PERF_IN := $(OUTPUT)perf-in.o
291 298
292export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX 299export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
293build := -f $(srctree)/tools/build/Makefile.build dir=. obj 300build := -f $(srctree)/tools/build/Makefile.build dir=. obj
294 301
295$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE 302$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
@@ -507,6 +514,11 @@ endif
507 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 514 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
508 $(call QUIET_INSTALL, perf-with-kcore) \ 515 $(call QUIET_INSTALL, perf-with-kcore) \
509 $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 516 $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
517ifndef NO_LIBAUDIT
518 $(call QUIET_INSTALL, strace/groups) \
519 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
520 $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
521endif
510ifndef NO_LIBPERL 522ifndef NO_LIBPERL
511 $(call QUIET_INSTALL, perl-scripts) \ 523 $(call QUIET_INSTALL, perl-scripts) \
512 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ 524 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
@@ -560,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean
560 $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete 572 $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
561 $(Q)$(RM) $(OUTPUT).config-detected 573 $(Q)$(RM) $(OUTPUT).config-detected
562 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 574 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
563 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* 575 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
576 $(OUTPUT)util/intel-pt-decoder/inat-tables.c
564 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean 577 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
565 $(python-clean) 578 $(python-clean)
566 579
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/alpha/Build
@@ -0,0 +1 @@
# empty
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index b7bb42c44694..b00dfd92ea73 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -128,7 +128,7 @@ static const char *normalize_arch(char *arch)
128 return arch; 128 return arch;
129} 129}
130 130
131static int perf_session_env__lookup_binutils_path(struct perf_session_env *env, 131static int perf_session_env__lookup_binutils_path(struct perf_env *env,
132 const char *name, 132 const char *name,
133 const char **path) 133 const char **path)
134{ 134{
@@ -206,7 +206,7 @@ out_error:
206 return -1; 206 return -1;
207} 207}
208 208
209int perf_session_env__lookup_objdump(struct perf_session_env *env) 209int perf_session_env__lookup_objdump(struct perf_env *env)
210{ 210{
211 /* 211 /*
212 * For live mode, env->arch will be NULL and we can use 212 * For live mode, env->arch will be NULL and we can use
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index ede246eda9be..20176df69fc8 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,6 @@
5 5
6extern const char *objdump_path; 6extern const char *objdump_path;
7 7
8int perf_session_env__lookup_objdump(struct perf_session_env *env); 8int perf_session_env__lookup_objdump(struct perf_env *env);
9 9
10#endif /* ARCH_PERF_COMMON_H */ 10#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/mips/Build
@@ -0,0 +1 @@
# empty
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/parisc/Build
@@ -0,0 +1 @@
# empty
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index cfbccc4e3187..2c55e1b336c5 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,8 +1,13 @@
1libperf-y += header.o 1libperf-y += header.o
2libperf-y += tsc.o 2libperf-y += tsc.o
3libperf-y += pmu.o
3libperf-y += kvm-stat.o 4libperf-y += kvm-stat.o
4 5
5libperf-$(CONFIG_DWARF) += dwarf-regs.o 6libperf-$(CONFIG_DWARF) += dwarf-regs.o
6 7
7libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o 8libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
8libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 9libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
10
11libperf-$(CONFIG_AUXTRACE) += auxtrace.o
12libperf-$(CONFIG_AUXTRACE) += intel-pt.o
13libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
new file mode 100644
index 000000000000..7a7805583e3f
--- /dev/null
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -0,0 +1,83 @@
1/*
2 * auxtrace.c: AUX area tracing support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdbool.h>
17
18#include "../../util/header.h"
19#include "../../util/debug.h"
20#include "../../util/pmu.h"
21#include "../../util/auxtrace.h"
22#include "../../util/intel-pt.h"
23#include "../../util/intel-bts.h"
24#include "../../util/evlist.h"
25
26static
27struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
28 int *err)
29{
30 struct perf_pmu *intel_pt_pmu;
31 struct perf_pmu *intel_bts_pmu;
32 struct perf_evsel *evsel;
33 bool found_pt = false;
34 bool found_bts = false;
35
36 intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
37 intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
38
39 if (evlist) {
40 evlist__for_each(evlist, evsel) {
41 if (intel_pt_pmu &&
42 evsel->attr.type == intel_pt_pmu->type)
43 found_pt = true;
44 if (intel_bts_pmu &&
45 evsel->attr.type == intel_bts_pmu->type)
46 found_bts = true;
47 }
48 }
49
50 if (found_pt && found_bts) {
51 pr_err("intel_pt and intel_bts may not be used together\n");
52 *err = -EINVAL;
53 return NULL;
54 }
55
56 if (found_pt)
57 return intel_pt_recording_init(err);
58
59 if (found_bts)
60 return intel_bts_recording_init(err);
61
62 return NULL;
63}
64
65struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
66 int *err)
67{
68 char buffer[64];
69 int ret;
70
71 *err = 0;
72
73 ret = get_cpuid(buffer, sizeof(buffer));
74 if (ret) {
75 *err = ret;
76 return NULL;
77 }
78
79 if (!strncmp(buffer, "GenuineIntel,", 13))
80 return auxtrace_record__init_intel(evlist, err);
81
82 return NULL;
83}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
new file mode 100644
index 000000000000..9b94ce520917
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -0,0 +1,458 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <linux/kernel.h>
17#include <linux/types.h>
18#include <linux/bitops.h>
19#include <linux/log2.h>
20
21#include "../../util/cpumap.h"
22#include "../../util/evsel.h"
23#include "../../util/evlist.h"
24#include "../../util/session.h"
25#include "../../util/util.h"
26#include "../../util/pmu.h"
27#include "../../util/debug.h"
28#include "../../util/tsc.h"
29#include "../../util/auxtrace.h"
30#include "../../util/intel-bts.h"
31
32#define KiB(x) ((x) * 1024)
33#define MiB(x) ((x) * 1024 * 1024)
34#define KiB_MASK(x) (KiB(x) - 1)
35#define MiB_MASK(x) (MiB(x) - 1)
36
37#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4)
38
39#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60)
40
41struct intel_bts_snapshot_ref {
42 void *ref_buf;
43 size_t ref_offset;
44 bool wrapped;
45};
46
47struct intel_bts_recording {
48 struct auxtrace_record itr;
49 struct perf_pmu *intel_bts_pmu;
50 struct perf_evlist *evlist;
51 bool snapshot_mode;
52 size_t snapshot_size;
53 int snapshot_ref_cnt;
54 struct intel_bts_snapshot_ref *snapshot_refs;
55};
56
57struct branch {
58 u64 from;
59 u64 to;
60 u64 misc;
61};
62
63static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
64{
65 return INTEL_BTS_AUXTRACE_PRIV_SIZE;
66}
67
68static int intel_bts_info_fill(struct auxtrace_record *itr,
69 struct perf_session *session,
70 struct auxtrace_info_event *auxtrace_info,
71 size_t priv_size)
72{
73 struct intel_bts_recording *btsr =
74 container_of(itr, struct intel_bts_recording, itr);
75 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
76 struct perf_event_mmap_page *pc;
77 struct perf_tsc_conversion tc = { .time_mult = 0, };
78 bool cap_user_time_zero = false;
79 int err;
80
81 if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
82 return -EINVAL;
83
84 if (!session->evlist->nr_mmaps)
85 return -EINVAL;
86
87 pc = session->evlist->mmap[0].base;
88 if (pc) {
89 err = perf_read_tsc_conversion(pc, &tc);
90 if (err) {
91 if (err != -EOPNOTSUPP)
92 return err;
93 } else {
94 cap_user_time_zero = tc.time_mult != 0;
95 }
96 if (!cap_user_time_zero)
97 ui__warning("Intel BTS: TSC not available\n");
98 }
99
100 auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
101 auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
102 auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
103 auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
104 auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
105 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
106 auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
107
108 return 0;
109}
110
111static int intel_bts_recording_options(struct auxtrace_record *itr,
112 struct perf_evlist *evlist,
113 struct record_opts *opts)
114{
115 struct intel_bts_recording *btsr =
116 container_of(itr, struct intel_bts_recording, itr);
117 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
118 struct perf_evsel *evsel, *intel_bts_evsel = NULL;
119 const struct cpu_map *cpus = evlist->cpus;
120 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
121
122 btsr->evlist = evlist;
123 btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
124
125 evlist__for_each(evlist, evsel) {
126 if (evsel->attr.type == intel_bts_pmu->type) {
127 if (intel_bts_evsel) {
128 pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
129 return -EINVAL;
130 }
131 evsel->attr.freq = 0;
132 evsel->attr.sample_period = 1;
133 intel_bts_evsel = evsel;
134 opts->full_auxtrace = true;
135 }
136 }
137
138 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
139 pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
140 return -EINVAL;
141 }
142
143 if (!opts->full_auxtrace)
144 return 0;
145
146 if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
147 pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
148 return -EINVAL;
149 }
150
151 /* Set default sizes for snapshot mode */
152 if (opts->auxtrace_snapshot_mode) {
153 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
154 if (privileged) {
155 opts->auxtrace_mmap_pages = MiB(4) / page_size;
156 } else {
157 opts->auxtrace_mmap_pages = KiB(128) / page_size;
158 if (opts->mmap_pages == UINT_MAX)
159 opts->mmap_pages = KiB(256) / page_size;
160 }
161 } else if (!opts->auxtrace_mmap_pages && !privileged &&
162 opts->mmap_pages == UINT_MAX) {
163 opts->mmap_pages = KiB(256) / page_size;
164 }
165 if (!opts->auxtrace_snapshot_size)
166 opts->auxtrace_snapshot_size =
167 opts->auxtrace_mmap_pages * (size_t)page_size;
168 if (!opts->auxtrace_mmap_pages) {
169 size_t sz = opts->auxtrace_snapshot_size;
170
171 sz = round_up(sz, page_size) / page_size;
172 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
173 }
174 if (opts->auxtrace_snapshot_size >
175 opts->auxtrace_mmap_pages * (size_t)page_size) {
176 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
177 opts->auxtrace_snapshot_size,
178 opts->auxtrace_mmap_pages * (size_t)page_size);
179 return -EINVAL;
180 }
181 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
182 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
183 return -EINVAL;
184 }
185 pr_debug2("Intel BTS snapshot size: %zu\n",
186 opts->auxtrace_snapshot_size);
187 }
188
189 /* Set default sizes for full trace mode */
190 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
191 if (privileged) {
192 opts->auxtrace_mmap_pages = MiB(4) / page_size;
193 } else {
194 opts->auxtrace_mmap_pages = KiB(128) / page_size;
195 if (opts->mmap_pages == UINT_MAX)
196 opts->mmap_pages = KiB(256) / page_size;
197 }
198 }
199
200 /* Validate auxtrace_mmap_pages */
201 if (opts->auxtrace_mmap_pages) {
202 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
203 size_t min_sz;
204
205 if (opts->auxtrace_snapshot_mode)
206 min_sz = KiB(4);
207 else
208 min_sz = KiB(8);
209
210 if (sz < min_sz || !is_power_of_2(sz)) {
211 pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
212 min_sz / 1024);
213 return -EINVAL;
214 }
215 }
216
217 if (intel_bts_evsel) {
218 /*
219 * To obtain the auxtrace buffer file descriptor, the auxtrace event
220 * must come first.
221 */
222 perf_evlist__to_front(evlist, intel_bts_evsel);
223 /*
224 * In the case of per-cpu mmaps, we need the CPU on the
225 * AUX event.
226 */
227 if (!cpu_map__empty(cpus))
228 perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
229 }
230
231 /* Add dummy event to keep tracking */
232 if (opts->full_auxtrace) {
233 struct perf_evsel *tracking_evsel;
234 int err;
235
236 err = parse_events(evlist, "dummy:u", NULL);
237 if (err)
238 return err;
239
240 tracking_evsel = perf_evlist__last(evlist);
241
242 perf_evlist__set_tracking_event(evlist, tracking_evsel);
243
244 tracking_evsel->attr.freq = 0;
245 tracking_evsel->attr.sample_period = 1;
246 }
247
248 return 0;
249}
250
251static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
252 struct record_opts *opts,
253 const char *str)
254{
255 struct intel_bts_recording *btsr =
256 container_of(itr, struct intel_bts_recording, itr);
257 unsigned long long snapshot_size = 0;
258 char *endptr;
259
260 if (str) {
261 snapshot_size = strtoull(str, &endptr, 0);
262 if (*endptr || snapshot_size > SIZE_MAX)
263 return -1;
264 }
265
266 opts->auxtrace_snapshot_mode = true;
267 opts->auxtrace_snapshot_size = snapshot_size;
268
269 btsr->snapshot_size = snapshot_size;
270
271 return 0;
272}
273
274static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
275{
276 return rdtsc();
277}
278
279static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
280 int idx)
281{
282 const size_t sz = sizeof(struct intel_bts_snapshot_ref);
283 int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
284 struct intel_bts_snapshot_ref *refs;
285
286 if (!new_cnt)
287 new_cnt = 16;
288
289 while (new_cnt <= idx)
290 new_cnt *= 2;
291
292 refs = calloc(new_cnt, sz);
293 if (!refs)
294 return -ENOMEM;
295
296 memcpy(refs, btsr->snapshot_refs, cnt * sz);
297
298 btsr->snapshot_refs = refs;
299 btsr->snapshot_ref_cnt = new_cnt;
300
301 return 0;
302}
303
304static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
305{
306 int i;
307
308 for (i = 0; i < btsr->snapshot_ref_cnt; i++)
309 zfree(&btsr->snapshot_refs[i].ref_buf);
310 zfree(&btsr->snapshot_refs);
311}
312
313static void intel_bts_recording_free(struct auxtrace_record *itr)
314{
315 struct intel_bts_recording *btsr =
316 container_of(itr, struct intel_bts_recording, itr);
317
318 intel_bts_free_snapshot_refs(btsr);
319 free(btsr);
320}
321
322static int intel_bts_snapshot_start(struct auxtrace_record *itr)
323{
324 struct intel_bts_recording *btsr =
325 container_of(itr, struct intel_bts_recording, itr);
326 struct perf_evsel *evsel;
327
328 evlist__for_each(btsr->evlist, evsel) {
329 if (evsel->attr.type == btsr->intel_bts_pmu->type)
330 return perf_evlist__disable_event(btsr->evlist, evsel);
331 }
332 return -EINVAL;
333}
334
335static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
336{
337 struct intel_bts_recording *btsr =
338 container_of(itr, struct intel_bts_recording, itr);
339 struct perf_evsel *evsel;
340
341 evlist__for_each(btsr->evlist, evsel) {
342 if (evsel->attr.type == btsr->intel_bts_pmu->type)
343 return perf_evlist__enable_event(btsr->evlist, evsel);
344 }
345 return -EINVAL;
346}
347
348static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
349{
350 int i, a, b;
351
352 b = buf_size >> 3;
353 a = b - 512;
354 if (a < 0)
355 a = 0;
356
357 for (i = a; i < b; i++) {
358 if (data[i])
359 return true;
360 }
361
362 return false;
363}
364
365static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
366 struct auxtrace_mmap *mm, unsigned char *data,
367 u64 *head, u64 *old)
368{
369 struct intel_bts_recording *btsr =
370 container_of(itr, struct intel_bts_recording, itr);
371 bool wrapped;
372 int err;
373
374 pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
375 __func__, idx, (size_t)*old, (size_t)*head);
376
377 if (idx >= btsr->snapshot_ref_cnt) {
378 err = intel_bts_alloc_snapshot_refs(btsr, idx);
379 if (err)
380 goto out_err;
381 }
382
383 wrapped = btsr->snapshot_refs[idx].wrapped;
384 if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
385 btsr->snapshot_refs[idx].wrapped = true;
386 wrapped = true;
387 }
388
389 /*
390 * In full trace mode 'head' continually increases. However in snapshot
391 * mode 'head' is an offset within the buffer. Here 'old' and 'head'
392 * are adjusted to match the full trace case which expects that 'old' is
393 * always less than 'head'.
394 */
395 if (wrapped) {
396 *old = *head;
397 *head += mm->len;
398 } else {
399 if (mm->mask)
400 *old &= mm->mask;
401 else
402 *old %= mm->len;
403 if (*old > *head)
404 *head += mm->len;
405 }
406
407 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
408 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
409
410 return 0;
411
412out_err:
413 pr_err("%s: failed, error %d\n", __func__, err);
414 return err;
415}
416
417static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
418{
419 struct intel_bts_recording *btsr =
420 container_of(itr, struct intel_bts_recording, itr);
421 struct perf_evsel *evsel;
422
423 evlist__for_each(btsr->evlist, evsel) {
424 if (evsel->attr.type == btsr->intel_bts_pmu->type)
425 return perf_evlist__enable_event_idx(btsr->evlist,
426 evsel, idx);
427 }
428 return -EINVAL;
429}
430
431struct auxtrace_record *intel_bts_recording_init(int *err)
432{
433 struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
434 struct intel_bts_recording *btsr;
435
436 if (!intel_bts_pmu)
437 return NULL;
438
439 btsr = zalloc(sizeof(struct intel_bts_recording));
440 if (!btsr) {
441 *err = -ENOMEM;
442 return NULL;
443 }
444
445 btsr->intel_bts_pmu = intel_bts_pmu;
446 btsr->itr.recording_options = intel_bts_recording_options;
447 btsr->itr.info_priv_size = intel_bts_info_priv_size;
448 btsr->itr.info_fill = intel_bts_info_fill;
449 btsr->itr.free = intel_bts_recording_free;
450 btsr->itr.snapshot_start = intel_bts_snapshot_start;
451 btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
452 btsr->itr.find_snapshot = intel_bts_find_snapshot;
453 btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
454 btsr->itr.reference = intel_bts_reference;
455 btsr->itr.read_finish = intel_bts_read_finish;
456 btsr->itr.alignment = sizeof(struct branch);
457 return &btsr->itr;
458}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
new file mode 100644
index 000000000000..2ca10d796c0b
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -0,0 +1,1007 @@
1/*
2 * intel_pt.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdbool.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/bitops.h>
20#include <linux/log2.h>
21#include <cpuid.h>
22
23#include "../../perf.h"
24#include "../../util/session.h"
25#include "../../util/event.h"
26#include "../../util/evlist.h"
27#include "../../util/evsel.h"
28#include "../../util/cpumap.h"
29#include "../../util/parse-options.h"
30#include "../../util/parse-events.h"
31#include "../../util/pmu.h"
32#include "../../util/debug.h"
33#include "../../util/auxtrace.h"
34#include "../../util/tsc.h"
35#include "../../util/intel-pt.h"
36
37#define KiB(x) ((x) * 1024)
38#define MiB(x) ((x) * 1024 * 1024)
39#define KiB_MASK(x) (KiB(x) - 1)
40#define MiB_MASK(x) (MiB(x) - 1)
41
42#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4)
43
44#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60)
45
46#define INTEL_PT_PSB_PERIOD_NEAR 256
47
48struct intel_pt_snapshot_ref {
49 void *ref_buf;
50 size_t ref_offset;
51 bool wrapped;
52};
53
54struct intel_pt_recording {
55 struct auxtrace_record itr;
56 struct perf_pmu *intel_pt_pmu;
57 int have_sched_switch;
58 struct perf_evlist *evlist;
59 bool snapshot_mode;
60 bool snapshot_init_done;
61 size_t snapshot_size;
62 size_t snapshot_ref_buf_size;
63 int snapshot_ref_cnt;
64 struct intel_pt_snapshot_ref *snapshot_refs;
65};
66
67static int intel_pt_parse_terms_with_default(struct list_head *formats,
68 const char *str,
69 u64 *config)
70{
71 struct list_head *terms;
72 struct perf_event_attr attr = { .size = 0, };
73 int err;
74
75 terms = malloc(sizeof(struct list_head));
76 if (!terms)
77 return -ENOMEM;
78
79 INIT_LIST_HEAD(terms);
80
81 err = parse_events_terms(terms, str);
82 if (err)
83 goto out_free;
84
85 attr.config = *config;
86 err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
87 if (err)
88 goto out_free;
89
90 *config = attr.config;
91out_free:
92 parse_events__free_terms(terms);
93 return err;
94}
95
96static int intel_pt_parse_terms(struct list_head *formats, const char *str,
97 u64 *config)
98{
99 *config = 0;
100 return intel_pt_parse_terms_with_default(formats, str, config);
101}
102
103static u64 intel_pt_masked_bits(u64 mask, u64 bits)
104{
105 const u64 top_bit = 1ULL << 63;
106 u64 res = 0;
107 int i;
108
109 for (i = 0; i < 64; i++) {
110 if (mask & top_bit) {
111 res <<= 1;
112 if (bits & top_bit)
113 res |= 1;
114 }
115 mask <<= 1;
116 bits <<= 1;
117 }
118
119 return res;
120}
121
122static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
123 struct perf_evlist *evlist, u64 *res)
124{
125 struct perf_evsel *evsel;
126 u64 mask;
127
128 *res = 0;
129
130 mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
131 if (!mask)
132 return -EINVAL;
133
134 evlist__for_each(evlist, evsel) {
135 if (evsel->attr.type == intel_pt_pmu->type) {
136 *res = intel_pt_masked_bits(mask, evsel->attr.config);
137 return 0;
138 }
139 }
140
141 return -EINVAL;
142}
143
144static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
145 struct perf_evlist *evlist)
146{
147 u64 val;
148 int err, topa_multiple_entries;
149 size_t psb_period;
150
151 if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
152 "%d", &topa_multiple_entries) != 1)
153 topa_multiple_entries = 0;
154
155 /*
156 * Use caps/topa_multiple_entries to indicate early hardware that had
157 * extra frequent PSBs.
158 */
159 if (!topa_multiple_entries) {
160 psb_period = 256;
161 goto out;
162 }
163
164 err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
165 if (err)
166 val = 0;
167
168 psb_period = 1 << (val + 11);
169out:
170 pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
171 return psb_period;
172}
173
174static int intel_pt_pick_bit(int bits, int target)
175{
176 int pos, pick = -1;
177
178 for (pos = 0; bits; bits >>= 1, pos++) {
179 if (bits & 1) {
180 if (pos <= target || pick < 0)
181 pick = pos;
182 if (pos >= target)
183 break;
184 }
185 }
186
187 return pick;
188}
189
190static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
191{
192 char buf[256];
193 int mtc, mtc_periods = 0, mtc_period;
194 int psb_cyc, psb_periods, psb_period;
195 int pos = 0;
196 u64 config;
197
198 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
199
200 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
201 &mtc) != 1)
202 mtc = 1;
203
204 if (mtc) {
205 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
206 &mtc_periods) != 1)
207 mtc_periods = 0;
208 if (mtc_periods) {
209 mtc_period = intel_pt_pick_bit(mtc_periods, 3);
210 pos += scnprintf(buf + pos, sizeof(buf) - pos,
211 ",mtc,mtc_period=%d", mtc_period);
212 }
213 }
214
215 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
216 &psb_cyc) != 1)
217 psb_cyc = 1;
218
219 if (psb_cyc && mtc_periods) {
220 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
221 &psb_periods) != 1)
222 psb_periods = 0;
223 if (psb_periods) {
224 psb_period = intel_pt_pick_bit(psb_periods, 3);
225 pos += scnprintf(buf + pos, sizeof(buf) - pos,
226 ",psb_period=%d", psb_period);
227 }
228 }
229
230 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
231
232 intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
233
234 return config;
235}
236
237static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
238 struct record_opts *opts,
239 const char *str)
240{
241 struct intel_pt_recording *ptr =
242 container_of(itr, struct intel_pt_recording, itr);
243 unsigned long long snapshot_size = 0;
244 char *endptr;
245
246 if (str) {
247 snapshot_size = strtoull(str, &endptr, 0);
248 if (*endptr || snapshot_size > SIZE_MAX)
249 return -1;
250 }
251
252 opts->auxtrace_snapshot_mode = true;
253 opts->auxtrace_snapshot_size = snapshot_size;
254
255 ptr->snapshot_size = snapshot_size;
256
257 return 0;
258}
259
260struct perf_event_attr *
261intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
262{
263 struct perf_event_attr *attr;
264
265 attr = zalloc(sizeof(struct perf_event_attr));
266 if (!attr)
267 return NULL;
268
269 attr->config = intel_pt_default_config(intel_pt_pmu);
270
271 intel_pt_pmu->selectable = true;
272
273 return attr;
274}
275
276static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
277{
278 return INTEL_PT_AUXTRACE_PRIV_SIZE;
279}
280
281static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
282{
283 unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
284
285 __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
286 *n = ebx;
287 *d = eax;
288}
289
290static int intel_pt_info_fill(struct auxtrace_record *itr,
291 struct perf_session *session,
292 struct auxtrace_info_event *auxtrace_info,
293 size_t priv_size)
294{
295 struct intel_pt_recording *ptr =
296 container_of(itr, struct intel_pt_recording, itr);
297 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
298 struct perf_event_mmap_page *pc;
299 struct perf_tsc_conversion tc = { .time_mult = 0, };
300 bool cap_user_time_zero = false, per_cpu_mmaps;
301 u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
302 u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
303 int err;
304
305 if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
306 return -EINVAL;
307
308 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
309 intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
310 &noretcomp_bit);
311 intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
312 mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
313 "mtc_period");
314 intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
315
316 intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
317
318 if (!session->evlist->nr_mmaps)
319 return -EINVAL;
320
321 pc = session->evlist->mmap[0].base;
322 if (pc) {
323 err = perf_read_tsc_conversion(pc, &tc);
324 if (err) {
325 if (err != -EOPNOTSUPP)
326 return err;
327 } else {
328 cap_user_time_zero = tc.time_mult != 0;
329 }
330 if (!cap_user_time_zero)
331 ui__warning("Intel Processor Trace: TSC not available\n");
332 }
333
334 per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
335
336 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
337 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
338 auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
339 auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
340 auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
341 auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
342 auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
343 auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
344 auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
345 auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
346 auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
347 auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
348 auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
349 auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
350 auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
351 auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
352
353 return 0;
354}
355
356static int intel_pt_track_switches(struct perf_evlist *evlist)
357{
358 const char *sched_switch = "sched:sched_switch";
359 struct perf_evsel *evsel;
360 int err;
361
362 if (!perf_evlist__can_select_event(evlist, sched_switch))
363 return -EPERM;
364
365 err = parse_events(evlist, sched_switch, NULL);
366 if (err) {
367 pr_debug2("%s: failed to parse %s, error %d\n",
368 __func__, sched_switch, err);
369 return err;
370 }
371
372 evsel = perf_evlist__last(evlist);
373
374 perf_evsel__set_sample_bit(evsel, CPU);
375 perf_evsel__set_sample_bit(evsel, TIME);
376
377 evsel->system_wide = true;
378 evsel->no_aux_samples = true;
379 evsel->immediate = true;
380
381 return 0;
382}
383
384static void intel_pt_valid_str(char *str, size_t len, u64 valid)
385{
386 unsigned int val, last = 0, state = 1;
387 int p = 0;
388
389 str[0] = '\0';
390
391 for (val = 0; val <= 64; val++, valid >>= 1) {
392 if (valid & 1) {
393 last = val;
394 switch (state) {
395 case 0:
396 p += scnprintf(str + p, len - p, ",");
397 /* Fall through */
398 case 1:
399 p += scnprintf(str + p, len - p, "%u", val);
400 state = 2;
401 break;
402 case 2:
403 state = 3;
404 break;
405 case 3:
406 state = 4;
407 break;
408 default:
409 break;
410 }
411 } else {
412 switch (state) {
413 case 3:
414 p += scnprintf(str + p, len - p, ",%u", last);
415 state = 0;
416 break;
417 case 4:
418 p += scnprintf(str + p, len - p, "-%u", last);
419 state = 0;
420 break;
421 default:
422 break;
423 }
424 if (state != 1)
425 state = 0;
426 }
427 }
428}
429
430static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
431 const char *caps, const char *name,
432 const char *supported, u64 config)
433{
434 char valid_str[256];
435 unsigned int shift;
436 unsigned long long valid;
437 u64 bits;
438 int ok;
439
440 if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
441 valid = 0;
442
443 if (supported &&
444 perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
445 valid = 0;
446
447 valid |= 1;
448
449 bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
450
451 config &= bits;
452
453 for (shift = 0; bits && !(bits & 1); shift++)
454 bits >>= 1;
455
456 config >>= shift;
457
458 if (config > 63)
459 goto out_err;
460
461 if (valid & (1 << config))
462 return 0;
463out_err:
464 intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
465 pr_err("Invalid %s for %s. Valid values are: %s\n",
466 name, INTEL_PT_PMU_NAME, valid_str);
467 return -EINVAL;
468}
469
470static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
471 struct perf_evsel *evsel)
472{
473 int err;
474
475 if (!evsel)
476 return 0;
477
478 err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
479 "cyc_thresh", "caps/psb_cyc",
480 evsel->attr.config);
481 if (err)
482 return err;
483
484 err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
485 "mtc_period", "caps/mtc",
486 evsel->attr.config);
487 if (err)
488 return err;
489
490 return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
491 "psb_period", "caps/psb_cyc",
492 evsel->attr.config);
493}
494
495static int intel_pt_recording_options(struct auxtrace_record *itr,
496 struct perf_evlist *evlist,
497 struct record_opts *opts)
498{
499 struct intel_pt_recording *ptr =
500 container_of(itr, struct intel_pt_recording, itr);
501 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
502 bool have_timing_info;
503 struct perf_evsel *evsel, *intel_pt_evsel = NULL;
504 const struct cpu_map *cpus = evlist->cpus;
505 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
506 u64 tsc_bit;
507 int err;
508
509 ptr->evlist = evlist;
510 ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
511
512 evlist__for_each(evlist, evsel) {
513 if (evsel->attr.type == intel_pt_pmu->type) {
514 if (intel_pt_evsel) {
515 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
516 return -EINVAL;
517 }
518 evsel->attr.freq = 0;
519 evsel->attr.sample_period = 1;
520 intel_pt_evsel = evsel;
521 opts->full_auxtrace = true;
522 }
523 }
524
525 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
526 pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
527 return -EINVAL;
528 }
529
530 if (opts->use_clockid) {
531 pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
532 return -EINVAL;
533 }
534
535 if (!opts->full_auxtrace)
536 return 0;
537
538 err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
539 if (err)
540 return err;
541
542 /* Set default sizes for snapshot mode */
543 if (opts->auxtrace_snapshot_mode) {
544 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
545
546 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
547 if (privileged) {
548 opts->auxtrace_mmap_pages = MiB(4) / page_size;
549 } else {
550 opts->auxtrace_mmap_pages = KiB(128) / page_size;
551 if (opts->mmap_pages == UINT_MAX)
552 opts->mmap_pages = KiB(256) / page_size;
553 }
554 } else if (!opts->auxtrace_mmap_pages && !privileged &&
555 opts->mmap_pages == UINT_MAX) {
556 opts->mmap_pages = KiB(256) / page_size;
557 }
558 if (!opts->auxtrace_snapshot_size)
559 opts->auxtrace_snapshot_size =
560 opts->auxtrace_mmap_pages * (size_t)page_size;
561 if (!opts->auxtrace_mmap_pages) {
562 size_t sz = opts->auxtrace_snapshot_size;
563
564 sz = round_up(sz, page_size) / page_size;
565 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
566 }
567 if (opts->auxtrace_snapshot_size >
568 opts->auxtrace_mmap_pages * (size_t)page_size) {
569 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
570 opts->auxtrace_snapshot_size,
571 opts->auxtrace_mmap_pages * (size_t)page_size);
572 return -EINVAL;
573 }
574 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
575 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
576 return -EINVAL;
577 }
578 pr_debug2("Intel PT snapshot size: %zu\n",
579 opts->auxtrace_snapshot_size);
580 if (psb_period &&
581 opts->auxtrace_snapshot_size <= psb_period +
582 INTEL_PT_PSB_PERIOD_NEAR)
583 ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
584 opts->auxtrace_snapshot_size, psb_period);
585 }
586
587 /* Set default sizes for full trace mode */
588 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
589 if (privileged) {
590 opts->auxtrace_mmap_pages = MiB(4) / page_size;
591 } else {
592 opts->auxtrace_mmap_pages = KiB(128) / page_size;
593 if (opts->mmap_pages == UINT_MAX)
594 opts->mmap_pages = KiB(256) / page_size;
595 }
596 }
597
598 /* Validate auxtrace_mmap_pages */
599 if (opts->auxtrace_mmap_pages) {
600 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
601 size_t min_sz;
602
603 if (opts->auxtrace_snapshot_mode)
604 min_sz = KiB(4);
605 else
606 min_sz = KiB(8);
607
608 if (sz < min_sz || !is_power_of_2(sz)) {
609 pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
610 min_sz / 1024);
611 return -EINVAL;
612 }
613 }
614
615 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
616
617 if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
618 have_timing_info = true;
619 else
620 have_timing_info = false;
621
622 /*
623 * Per-cpu recording needs sched_switch events to distinguish different
624 * threads.
625 */
626 if (have_timing_info && !cpu_map__empty(cpus)) {
627 err = intel_pt_track_switches(evlist);
628 if (err == -EPERM)
629 pr_debug2("Unable to select sched:sched_switch\n");
630 else if (err)
631 return err;
632 else
633 ptr->have_sched_switch = 1;
634 }
635
636 if (intel_pt_evsel) {
637 /*
638 * To obtain the auxtrace buffer file descriptor, the auxtrace
639 * event must come first.
640 */
641 perf_evlist__to_front(evlist, intel_pt_evsel);
642 /*
643 * In the case of per-cpu mmaps, we need the CPU on the
644 * AUX event.
645 */
646 if (!cpu_map__empty(cpus))
647 perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
648 }
649
650 /* Add dummy event to keep tracking */
651 if (opts->full_auxtrace) {
652 struct perf_evsel *tracking_evsel;
653
654 err = parse_events(evlist, "dummy:u", NULL);
655 if (err)
656 return err;
657
658 tracking_evsel = perf_evlist__last(evlist);
659
660 perf_evlist__set_tracking_event(evlist, tracking_evsel);
661
662 tracking_evsel->attr.freq = 0;
663 tracking_evsel->attr.sample_period = 1;
664
665 /* In per-cpu case, always need the time of mmap events etc */
666 if (!cpu_map__empty(cpus))
667 perf_evsel__set_sample_bit(tracking_evsel, TIME);
668 }
669
670 /*
671 * Warn the user when we do not have enough information to decode i.e.
672 * per-cpu with no sched_switch (except workload-only).
673 */
674 if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
675 !target__none(&opts->target))
676 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
677
678 return 0;
679}
680
681static int intel_pt_snapshot_start(struct auxtrace_record *itr)
682{
683 struct intel_pt_recording *ptr =
684 container_of(itr, struct intel_pt_recording, itr);
685 struct perf_evsel *evsel;
686
687 evlist__for_each(ptr->evlist, evsel) {
688 if (evsel->attr.type == ptr->intel_pt_pmu->type)
689 return perf_evlist__disable_event(ptr->evlist, evsel);
690 }
691 return -EINVAL;
692}
693
694static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
695{
696 struct intel_pt_recording *ptr =
697 container_of(itr, struct intel_pt_recording, itr);
698 struct perf_evsel *evsel;
699
700 evlist__for_each(ptr->evlist, evsel) {
701 if (evsel->attr.type == ptr->intel_pt_pmu->type)
702 return perf_evlist__enable_event(ptr->evlist, evsel);
703 }
704 return -EINVAL;
705}
706
707static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
708{
709 const size_t sz = sizeof(struct intel_pt_snapshot_ref);
710 int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
711 struct intel_pt_snapshot_ref *refs;
712
713 if (!new_cnt)
714 new_cnt = 16;
715
716 while (new_cnt <= idx)
717 new_cnt *= 2;
718
719 refs = calloc(new_cnt, sz);
720 if (!refs)
721 return -ENOMEM;
722
723 memcpy(refs, ptr->snapshot_refs, cnt * sz);
724
725 ptr->snapshot_refs = refs;
726 ptr->snapshot_ref_cnt = new_cnt;
727
728 return 0;
729}
730
731static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
732{
733 int i;
734
735 for (i = 0; i < ptr->snapshot_ref_cnt; i++)
736 zfree(&ptr->snapshot_refs[i].ref_buf);
737 zfree(&ptr->snapshot_refs);
738}
739
740static void intel_pt_recording_free(struct auxtrace_record *itr)
741{
742 struct intel_pt_recording *ptr =
743 container_of(itr, struct intel_pt_recording, itr);
744
745 intel_pt_free_snapshot_refs(ptr);
746 free(ptr);
747}
748
749static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
750 size_t snapshot_buf_size)
751{
752 size_t ref_buf_size = ptr->snapshot_ref_buf_size;
753 void *ref_buf;
754
755 ref_buf = zalloc(ref_buf_size);
756 if (!ref_buf)
757 return -ENOMEM;
758
759 ptr->snapshot_refs[idx].ref_buf = ref_buf;
760 ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
761
762 return 0;
763}
764
765static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
766 size_t snapshot_buf_size)
767{
768 const size_t max_size = 256 * 1024;
769 size_t buf_size = 0, psb_period;
770
771 if (ptr->snapshot_size <= 64 * 1024)
772 return 0;
773
774 psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
775 if (psb_period)
776 buf_size = psb_period * 2;
777
778 if (!buf_size || buf_size > max_size)
779 buf_size = max_size;
780
781 if (buf_size >= snapshot_buf_size)
782 return 0;
783
784 if (buf_size >= ptr->snapshot_size / 2)
785 return 0;
786
787 return buf_size;
788}
789
790static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
791 size_t snapshot_buf_size)
792{
793 if (ptr->snapshot_init_done)
794 return 0;
795
796 ptr->snapshot_init_done = true;
797
798 ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
799 snapshot_buf_size);
800
801 return 0;
802}
803
804/**
805 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
806 * @buf1: first buffer
807 * @compare_size: number of bytes to compare
808 * @buf2: second buffer (a circular buffer)
809 * @offs2: offset in second buffer
810 * @buf2_size: size of second buffer
811 *
812 * The comparison allows for the possibility that the bytes to compare in the
813 * circular buffer are not contiguous. It is assumed that @compare_size <=
814 * @buf2_size. This function returns %false if the bytes are identical, %true
815 * otherwise.
816 */
817static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
818 void *buf2, size_t offs2, size_t buf2_size)
819{
820 size_t end2 = offs2 + compare_size, part_size;
821
822 if (end2 <= buf2_size)
823 return memcmp(buf1, buf2 + offs2, compare_size);
824
825 part_size = end2 - buf2_size;
826 if (memcmp(buf1, buf2 + offs2, part_size))
827 return true;
828
829 compare_size -= part_size;
830
831 return memcmp(buf1 + part_size, buf2, compare_size);
832}
833
834static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
835 size_t ref_size, size_t buf_size,
836 void *data, size_t head)
837{
838 size_t ref_end = ref_offset + ref_size;
839
840 if (ref_end > buf_size) {
841 if (head > ref_offset || head < ref_end - buf_size)
842 return true;
843 } else if (head > ref_offset && head < ref_end) {
844 return true;
845 }
846
847 return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
848 buf_size);
849}
850
851static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
852 void *data, size_t head)
853{
854 if (head >= ref_size) {
855 memcpy(ref_buf, data + head - ref_size, ref_size);
856 } else {
857 memcpy(ref_buf, data, head);
858 ref_size -= head;
859 memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
860 }
861}
862
863static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
864 struct auxtrace_mmap *mm, unsigned char *data,
865 u64 head)
866{
867 struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
868 bool wrapped;
869
870 wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
871 ptr->snapshot_ref_buf_size, mm->len,
872 data, head);
873
874 intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
875 data, head);
876
877 return wrapped;
878}
879
880static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
881{
882 int i, a, b;
883
884 b = buf_size >> 3;
885 a = b - 512;
886 if (a < 0)
887 a = 0;
888
889 for (i = a; i < b; i++) {
890 if (data[i])
891 return true;
892 }
893
894 return false;
895}
896
897static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
898 struct auxtrace_mmap *mm, unsigned char *data,
899 u64 *head, u64 *old)
900{
901 struct intel_pt_recording *ptr =
902 container_of(itr, struct intel_pt_recording, itr);
903 bool wrapped;
904 int err;
905
906 pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
907 __func__, idx, (size_t)*old, (size_t)*head);
908
909 err = intel_pt_snapshot_init(ptr, mm->len);
910 if (err)
911 goto out_err;
912
913 if (idx >= ptr->snapshot_ref_cnt) {
914 err = intel_pt_alloc_snapshot_refs(ptr, idx);
915 if (err)
916 goto out_err;
917 }
918
919 if (ptr->snapshot_ref_buf_size) {
920 if (!ptr->snapshot_refs[idx].ref_buf) {
921 err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
922 if (err)
923 goto out_err;
924 }
925 wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
926 } else {
927 wrapped = ptr->snapshot_refs[idx].wrapped;
928 if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
929 ptr->snapshot_refs[idx].wrapped = true;
930 wrapped = true;
931 }
932 }
933
934 /*
935 * In full trace mode 'head' continually increases. However in snapshot
936 * mode 'head' is an offset within the buffer. Here 'old' and 'head'
937 * are adjusted to match the full trace case which expects that 'old' is
938 * always less than 'head'.
939 */
940 if (wrapped) {
941 *old = *head;
942 *head += mm->len;
943 } else {
944 if (mm->mask)
945 *old &= mm->mask;
946 else
947 *old %= mm->len;
948 if (*old > *head)
949 *head += mm->len;
950 }
951
952 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
953 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
954
955 return 0;
956
957out_err:
958 pr_err("%s: failed, error %d\n", __func__, err);
959 return err;
960}
961
962static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
963{
964 return rdtsc();
965}
966
967static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
968{
969 struct intel_pt_recording *ptr =
970 container_of(itr, struct intel_pt_recording, itr);
971 struct perf_evsel *evsel;
972
973 evlist__for_each(ptr->evlist, evsel) {
974 if (evsel->attr.type == ptr->intel_pt_pmu->type)
975 return perf_evlist__enable_event_idx(ptr->evlist, evsel,
976 idx);
977 }
978 return -EINVAL;
979}
980
981struct auxtrace_record *intel_pt_recording_init(int *err)
982{
983 struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
984 struct intel_pt_recording *ptr;
985
986 if (!intel_pt_pmu)
987 return NULL;
988
989 ptr = zalloc(sizeof(struct intel_pt_recording));
990 if (!ptr) {
991 *err = -ENOMEM;
992 return NULL;
993 }
994
995 ptr->intel_pt_pmu = intel_pt_pmu;
996 ptr->itr.recording_options = intel_pt_recording_options;
997 ptr->itr.info_priv_size = intel_pt_info_priv_size;
998 ptr->itr.info_fill = intel_pt_info_fill;
999 ptr->itr.free = intel_pt_recording_free;
1000 ptr->itr.snapshot_start = intel_pt_snapshot_start;
1001 ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
1002 ptr->itr.find_snapshot = intel_pt_find_snapshot;
1003 ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
1004 ptr->itr.reference = intel_pt_reference;
1005 ptr->itr.read_finish = intel_pt_read_finish;
1006 return &ptr->itr;
1007}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
new file mode 100644
index 000000000000..79fe07158d00
--- /dev/null
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -0,0 +1,18 @@
1#include <string.h>
2
3#include <linux/perf_event.h>
4
5#include "../../util/intel-pt.h"
6#include "../../util/intel-bts.h"
7#include "../../util/pmu.h"
8
9struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
10{
11#ifdef HAVE_AUXTRACE_SUPPORT
12 if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
13 return intel_pt_pmu_default_config(pmu);
14 if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
15 pmu->selectable = true;
16#endif
17 return NULL;
18}
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index c3ab760e06b4..573e28896038 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -5,6 +5,7 @@ perf-y += futex-hash.o
5perf-y += futex-wake.o 5perf-y += futex-wake.o
6perf-y += futex-wake-parallel.o 6perf-y += futex-wake-parallel.o
7perf-y += futex-requeue.o 7perf-y += futex-requeue.o
8perf-y += futex-lock-pi.o
8 9
9perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o 10perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
10perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o 11perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 70b2f718cc21..a50df86f2b9b 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
36extern int bench_futex_wake_parallel(int argc, const char **argv, 36extern int bench_futex_wake_parallel(int argc, const char **argv,
37 const char *prefix); 37 const char *prefix);
38extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); 38extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
39/* pi futexes */
40extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
39 41
40#define BENCH_FORMAT_DEFAULT_STR "default" 42#define BENCH_FORMAT_DEFAULT_STR "default"
41#define BENCH_FORMAT_DEFAULT 0 43#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
new file mode 100644
index 000000000000..bc6a16adbca8
--- /dev/null
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -0,0 +1,219 @@
1/*
2 * Copyright (C) 2015 Davidlohr Bueso.
3 */
4
5#include "../perf.h"
6#include "../util/util.h"
7#include "../util/stat.h"
8#include "../util/parse-options.h"
9#include "../util/header.h"
10#include "bench.h"
11#include "futex.h"
12
13#include <err.h>
14#include <stdlib.h>
15#include <sys/time.h>
16#include <pthread.h>
17
18struct worker {
19 int tid;
20 u_int32_t *futex;
21 pthread_t thread;
22 unsigned long ops;
23};
24
25static u_int32_t global_futex = 0;
26static struct worker *worker;
27static unsigned int nsecs = 10;
28static bool silent = false, multi = false;
29static bool done = false, fshared = false;
30static unsigned int ncpus, nthreads = 0;
31static int futex_flag = 0;
32struct timeval start, end, runtime;
33static pthread_mutex_t thread_lock;
34static unsigned int threads_starting;
35static struct stats throughput_stats;
36static pthread_cond_t thread_parent, thread_worker;
37
38static const struct option options[] = {
39 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
40 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
41 OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"),
42 OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
43 OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
44 OPT_END()
45};
46
47static const char * const bench_futex_lock_pi_usage[] = {
48 "perf bench futex requeue <options>",
49 NULL
50};
51
52static void print_summary(void)
53{
54 unsigned long avg = avg_stats(&throughput_stats);
55 double stddev = stddev_stats(&throughput_stats);
56
57 printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
58 !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
59 (int) runtime.tv_sec);
60}
61
62static void toggle_done(int sig __maybe_unused,
63 siginfo_t *info __maybe_unused,
64 void *uc __maybe_unused)
65{
66 /* inform all threads that we're done for the day */
67 done = true;
68 gettimeofday(&end, NULL);
69 timersub(&end, &start, &runtime);
70}
71
72static void *workerfn(void *arg)
73{
74 struct worker *w = (struct worker *) arg;
75
76 pthread_mutex_lock(&thread_lock);
77 threads_starting--;
78 if (!threads_starting)
79 pthread_cond_signal(&thread_parent);
80 pthread_cond_wait(&thread_worker, &thread_lock);
81 pthread_mutex_unlock(&thread_lock);
82
83 do {
84 int ret;
85 again:
86 ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
87
88 if (ret) { /* handle lock acquisition */
89 if (!silent)
90 warn("thread %d: Could not lock pi-lock for %p (%d)",
91 w->tid, w->futex, ret);
92 if (done)
93 break;
94
95 goto again;
96 }
97
98 usleep(1);
99 ret = futex_unlock_pi(w->futex, futex_flag);
100 if (ret && !silent)
101 warn("thread %d: Could not unlock pi-lock for %p (%d)",
102 w->tid, w->futex, ret);
103 w->ops++; /* account for thread's share of work */
104 } while (!done);
105
106 return NULL;
107}
108
109static void create_threads(struct worker *w, pthread_attr_t thread_attr)
110{
111 cpu_set_t cpu;
112 unsigned int i;
113
114 threads_starting = nthreads;
115
116 for (i = 0; i < nthreads; i++) {
117 worker[i].tid = i;
118
119 if (multi) {
120 worker[i].futex = calloc(1, sizeof(u_int32_t));
121 if (!worker[i].futex)
122 err(EXIT_FAILURE, "calloc");
123 } else
124 worker[i].futex = &global_futex;
125
126 CPU_ZERO(&cpu);
127 CPU_SET(i % ncpus, &cpu);
128
129 if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
130 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
131
132 if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
133 err(EXIT_FAILURE, "pthread_create");
134 }
135}
136
137int bench_futex_lock_pi(int argc, const char **argv,
138 const char *prefix __maybe_unused)
139{
140 int ret = 0;
141 unsigned int i;
142 struct sigaction act;
143 pthread_attr_t thread_attr;
144
145 argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
146 if (argc)
147 goto err;
148
149 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
150
151 sigfillset(&act.sa_mask);
152 act.sa_sigaction = toggle_done;
153 sigaction(SIGINT, &act, NULL);
154
155 if (!nthreads)
156 nthreads = ncpus;
157
158 worker = calloc(nthreads, sizeof(*worker));
159 if (!worker)
160 err(EXIT_FAILURE, "calloc");
161
162 if (!fshared)
163 futex_flag = FUTEX_PRIVATE_FLAG;
164
165 printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
166 getpid(), nthreads, nsecs);
167
168 init_stats(&throughput_stats);
169 pthread_mutex_init(&thread_lock, NULL);
170 pthread_cond_init(&thread_parent, NULL);
171 pthread_cond_init(&thread_worker, NULL);
172
173 threads_starting = nthreads;
174 pthread_attr_init(&thread_attr);
175 gettimeofday(&start, NULL);
176
177 create_threads(worker, thread_attr);
178 pthread_attr_destroy(&thread_attr);
179
180 pthread_mutex_lock(&thread_lock);
181 while (threads_starting)
182 pthread_cond_wait(&thread_parent, &thread_lock);
183 pthread_cond_broadcast(&thread_worker);
184 pthread_mutex_unlock(&thread_lock);
185
186 sleep(nsecs);
187 toggle_done(0, NULL, NULL);
188
189 for (i = 0; i < nthreads; i++) {
190 ret = pthread_join(worker[i].thread, NULL);
191 if (ret)
192 err(EXIT_FAILURE, "pthread_join");
193 }
194
195 /* cleanup & report results */
196 pthread_cond_destroy(&thread_parent);
197 pthread_cond_destroy(&thread_worker);
198 pthread_mutex_destroy(&thread_lock);
199
200 for (i = 0; i < nthreads; i++) {
201 unsigned long t = worker[i].ops/runtime.tv_sec;
202
203 update_stats(&throughput_stats, t);
204 if (!silent)
205 printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
206 worker[i].tid, worker[i].futex, t);
207
208 if (multi)
209 free(worker[i].futex);
210 }
211
212 print_summary();
213
214 free(worker);
215 return ret;
216err:
217 usage_with_options(bench_futex_lock_pi_usage, options);
218 exit(EXIT_FAILURE);
219}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 7ed22ff1e1ac..d44de9f44281 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
56} 56}
57 57
58/** 58/**
59 * futex_lock_pi() - block on uaddr as a PI mutex
60 * @detect: whether (1) or not (0) to perform deadlock detection
61 */
62static inline int
63futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
64 int opflags)
65{
66 return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
67}
68
69/**
70 * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
71 */
72static inline int
73futex_unlock_pi(u_int32_t *uaddr, int opflags)
74{
75 return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
76}
77
78/**
59* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 79* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
60* @nr_wake: wake up to this many tasks 80* @nr_wake: wake up to this many tasks
61* @nr_requeue: requeue up to this many tasks 81* @nr_requeue: requeue up to this many tasks
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 2c1bec39c30e..8edc205ff9a7 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
67 rb_erase(&al->sym->rb_node, 67 rb_erase(&al->sym->rb_node,
68 &al->map->dso->symbols[al->map->type]); 68 &al->map->dso->symbols[al->map->type]);
69 symbol__delete(al->sym); 69 symbol__delete(al->sym);
70 dso__reset_find_symbol_cache(al->map->dso);
70 } 71 }
71 return 0; 72 return 0;
72 } 73 }
@@ -187,6 +188,7 @@ find_next:
187 * symbol, free he->ms.sym->src to signal we already 188 * symbol, free he->ms.sym->src to signal we already
188 * processed this symbol. 189 * processed this symbol.
189 */ 190 */
191 zfree(&notes->src->cycles_hist);
190 zfree(&notes->src); 192 zfree(&notes->src);
191 } 193 }
192 } 194 }
@@ -238,6 +240,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
238 if (nr_samples > 0) { 240 if (nr_samples > 0) {
239 total_nr_samples += nr_samples; 241 total_nr_samples += nr_samples;
240 hists__collapse_resort(hists, NULL); 242 hists__collapse_resort(hists, NULL);
243 /* Don't sort callchain */
244 perf_evsel__reset_sample_bit(pos, CALLCHAIN);
241 hists__output_resort(hists, NULL); 245 hists__output_resort(hists, NULL);
242 246
243 if (symbol_conf.event_group && 247 if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b5314e452ec7..f67934d46d40 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -60,6 +60,8 @@ static struct bench futex_benchmarks[] = {
60 { "wake", "Benchmark for futex wake calls", bench_futex_wake }, 60 { "wake", "Benchmark for futex wake calls", bench_futex_wake },
61 { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, 61 { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
62 { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, 62 { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
63 /* pi-futexes */
64 { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
63 { "all", "Test all futex benchmarks", NULL }, 65 { "all", "Test all futex benchmarks", NULL },
64 { NULL, NULL, NULL } 66 { NULL, NULL, NULL }
65}; 67};
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d47a0cdc71c9..7b8450cd33c2 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -25,8 +25,6 @@
25static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) 25static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
26{ 26{
27 char root_dir[PATH_MAX]; 27 char root_dir[PATH_MAX];
28 char notes[PATH_MAX];
29 u8 build_id[BUILD_ID_SIZE];
30 char *p; 28 char *p;
31 29
32 strlcpy(root_dir, proc_dir, sizeof(root_dir)); 30 strlcpy(root_dir, proc_dir, sizeof(root_dir));
@@ -35,15 +33,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
35 if (!p) 33 if (!p)
36 return -1; 34 return -1;
37 *p = '\0'; 35 *p = '\0';
38 36 return sysfs__sprintf_build_id(root_dir, sbuildid);
39 scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
40
41 if (sysfs__read_build_id(notes, build_id, sizeof(build_id)))
42 return -1;
43
44 build_id__sprintf(build_id, sizeof(build_id), sbuildid);
45
46 return 0;
47} 37}
48 38
49static int build_id_cache__kcore_dir(char *dir, size_t sz) 39static int build_id_cache__kcore_dir(char *dir, size_t sz)
@@ -127,7 +117,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
127 117
128static int build_id_cache__add_kcore(const char *filename, bool force) 118static int build_id_cache__add_kcore(const char *filename, bool force)
129{ 119{
130 char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; 120 char dir[32], sbuildid[SBUILD_ID_SIZE];
131 char from_dir[PATH_MAX], to_dir[PATH_MAX]; 121 char from_dir[PATH_MAX], to_dir[PATH_MAX];
132 char *p; 122 char *p;
133 123
@@ -138,7 +128,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
138 return -1; 128 return -1;
139 *p = '\0'; 129 *p = '\0';
140 130
141 if (build_id_cache__kcore_buildid(from_dir, sbuildid)) 131 if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0)
142 return -1; 132 return -1;
143 133
144 scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", 134 scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
@@ -184,7 +174,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
184 174
185static int build_id_cache__add_file(const char *filename) 175static int build_id_cache__add_file(const char *filename)
186{ 176{
187 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 177 char sbuild_id[SBUILD_ID_SIZE];
188 u8 build_id[BUILD_ID_SIZE]; 178 u8 build_id[BUILD_ID_SIZE];
189 int err; 179 int err;
190 180
@@ -204,7 +194,7 @@ static int build_id_cache__add_file(const char *filename)
204static int build_id_cache__remove_file(const char *filename) 194static int build_id_cache__remove_file(const char *filename)
205{ 195{
206 u8 build_id[BUILD_ID_SIZE]; 196 u8 build_id[BUILD_ID_SIZE];
207 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 197 char sbuild_id[SBUILD_ID_SIZE];
208 198
209 int err; 199 int err;
210 200
@@ -276,7 +266,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f
276static int build_id_cache__update_file(const char *filename) 266static int build_id_cache__update_file(const char *filename)
277{ 267{
278 u8 build_id[BUILD_ID_SIZE]; 268 u8 build_id[BUILD_ID_SIZE];
279 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 269 char sbuild_id[SBUILD_ID_SIZE];
280 270
281 int err = 0; 271 int err = 0;
282 272
@@ -363,7 +353,7 @@ int cmd_buildid_cache(int argc, const char **argv,
363 setup_pager(); 353 setup_pager();
364 354
365 if (add_name_list_str) { 355 if (add_name_list_str) {
366 list = strlist__new(true, add_name_list_str); 356 list = strlist__new(add_name_list_str, NULL);
367 if (list) { 357 if (list) {
368 strlist__for_each(pos, list) 358 strlist__for_each(pos, list)
369 if (build_id_cache__add_file(pos->s)) { 359 if (build_id_cache__add_file(pos->s)) {
@@ -381,7 +371,7 @@ int cmd_buildid_cache(int argc, const char **argv,
381 } 371 }
382 372
383 if (remove_name_list_str) { 373 if (remove_name_list_str) {
384 list = strlist__new(true, remove_name_list_str); 374 list = strlist__new(remove_name_list_str, NULL);
385 if (list) { 375 if (list) {
386 strlist__for_each(pos, list) 376 strlist__for_each(pos, list)
387 if (build_id_cache__remove_file(pos->s)) { 377 if (build_id_cache__remove_file(pos->s)) {
@@ -399,7 +389,7 @@ int cmd_buildid_cache(int argc, const char **argv,
399 } 389 }
400 390
401 if (purge_name_list_str) { 391 if (purge_name_list_str) {
402 list = strlist__new(true, purge_name_list_str); 392 list = strlist__new(purge_name_list_str, NULL);
403 if (list) { 393 if (list) {
404 strlist__for_each(pos, list) 394 strlist__for_each(pos, list)
405 if (build_id_cache__purge_path(pos->s)) { 395 if (build_id_cache__purge_path(pos->s)) {
@@ -420,7 +410,7 @@ int cmd_buildid_cache(int argc, const char **argv,
420 ret = build_id_cache__fprintf_missing(session, stdout); 410 ret = build_id_cache__fprintf_missing(session, stdout);
421 411
422 if (update_name_list_str) { 412 if (update_name_list_str) {
423 list = strlist__new(true, update_name_list_str); 413 list = strlist__new(update_name_list_str, NULL);
424 if (list) { 414 if (list) {
425 strlist__for_each(pos, list) 415 strlist__for_each(pos, list)
426 if (build_id_cache__update_file(pos->s)) { 416 if (build_id_cache__update_file(pos->s)) {
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 9fe93c8d4fcf..918b4de29de4 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -19,29 +19,25 @@
19 19
20static int sysfs__fprintf_build_id(FILE *fp) 20static int sysfs__fprintf_build_id(FILE *fp)
21{ 21{
22 u8 kallsyms_build_id[BUILD_ID_SIZE]; 22 char sbuild_id[SBUILD_ID_SIZE];
23 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 23 int ret;
24 24
25 if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, 25 ret = sysfs__sprintf_build_id("/", sbuild_id);
26 sizeof(kallsyms_build_id)) != 0) 26 if (ret != sizeof(sbuild_id))
27 return -1; 27 return ret < 0 ? ret : -EINVAL;
28 28
29 build_id__sprintf(kallsyms_build_id, sizeof(kallsyms_build_id), 29 return fprintf(fp, "%s\n", sbuild_id);
30 sbuild_id);
31 fprintf(fp, "%s\n", sbuild_id);
32 return 0;
33} 30}
34 31
35static int filename__fprintf_build_id(const char *name, FILE *fp) 32static int filename__fprintf_build_id(const char *name, FILE *fp)
36{ 33{
37 u8 build_id[BUILD_ID_SIZE]; 34 char sbuild_id[SBUILD_ID_SIZE];
38 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 35 int ret;
39 36
40 if (filename__read_build_id(name, build_id, 37 ret = filename__sprintf_build_id(name, sbuild_id);
41 sizeof(build_id)) != sizeof(build_id)) 38 if (ret != sizeof(sbuild_id))
42 return 0; 39 return ret < 0 ? ret : -EINVAL;
43 40
44 build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
45 return fprintf(fp, "%s\n", sbuild_id); 41 return fprintf(fp, "%s\n", sbuild_id);
46} 42}
47 43
@@ -63,7 +59,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
63 /* 59 /*
64 * See if this is an ELF file first: 60 * See if this is an ELF file first:
65 */ 61 */
66 if (filename__fprintf_build_id(input_name, stdout)) 62 if (filename__fprintf_build_id(input_name, stdout) > 0)
67 goto out; 63 goto out;
68 64
69 session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); 65 session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index daaa7dca9c3b..0b180a885ba3 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -722,6 +722,9 @@ static void data_process(void)
722 if (verbose || data__files_cnt > 2) 722 if (verbose || data__files_cnt > 2)
723 data__fprintf(); 723 data__fprintf();
724 724
725 /* Don't sort callchain for perf diff */
726 perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
727
725 hists__process(hists_base); 728 hists__process(hists_base);
726 } 729 }
727} 730}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 01b06492bd6a..f62c49b35be0 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -561,6 +561,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
561 .lost = perf_event__repipe, 561 .lost = perf_event__repipe,
562 .aux = perf_event__repipe, 562 .aux = perf_event__repipe,
563 .itrace_start = perf_event__repipe, 563 .itrace_start = perf_event__repipe,
564 .context_switch = perf_event__repipe,
564 .read = perf_event__repipe_sample, 565 .read = perf_event__repipe_sample,
565 .throttle = perf_event__repipe, 566 .throttle = perf_event__repipe,
566 .unthrottle = perf_event__repipe, 567 .unthrottle = perf_event__repipe,
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 1272559fa22d..b81cec33b4b2 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -297,8 +297,7 @@ static void cleanup_params(void)
297 clear_perf_probe_event(params.events + i); 297 clear_perf_probe_event(params.events + i);
298 line_range__clear(&params.line_range); 298 line_range__clear(&params.line_range);
299 free(params.target); 299 free(params.target);
300 if (params.filter) 300 strfilter__delete(params.filter);
301 strfilter__delete(params.filter);
302 memset(&params, 0, sizeof(params)); 301 memset(&params, 0, sizeof(params));
303} 302}
304 303
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 20b56eb987f8..a660022f2c92 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -771,12 +771,14 @@ static void callchain_debug(void)
771 callchain_param.dump_size); 771 callchain_param.dump_size);
772} 772}
773 773
774int record_parse_callchain_opt(const struct option *opt __maybe_unused, 774int record_parse_callchain_opt(const struct option *opt,
775 const char *arg, 775 const char *arg,
776 int unset) 776 int unset)
777{ 777{
778 int ret; 778 int ret;
779 struct record_opts *record = (struct record_opts *)opt->value;
779 780
781 record->callgraph_set = true;
780 callchain_param.enabled = !unset; 782 callchain_param.enabled = !unset;
781 783
782 /* --no-call-graph */ 784 /* --no-call-graph */
@@ -786,17 +788,20 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused,
786 return 0; 788 return 0;
787 } 789 }
788 790
789 ret = parse_callchain_record_opt(arg); 791 ret = parse_callchain_record_opt(arg, &callchain_param);
790 if (!ret) 792 if (!ret)
791 callchain_debug(); 793 callchain_debug();
792 794
793 return ret; 795 return ret;
794} 796}
795 797
796int record_callchain_opt(const struct option *opt __maybe_unused, 798int record_callchain_opt(const struct option *opt,
797 const char *arg __maybe_unused, 799 const char *arg __maybe_unused,
798 int unset __maybe_unused) 800 int unset __maybe_unused)
799{ 801{
802 struct record_opts *record = (struct record_opts *)opt->value;
803
804 record->callgraph_set = true;
800 callchain_param.enabled = true; 805 callchain_param.enabled = true;
801 806
802 if (callchain_param.record_mode == CALLCHAIN_NONE) 807 if (callchain_param.record_mode == CALLCHAIN_NONE)
@@ -1003,6 +1008,9 @@ struct option __record_options[] = {
1003 parse_events_option), 1008 parse_events_option),
1004 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1009 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1005 "event filter", parse_filter), 1010 "event filter", parse_filter),
1011 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1012 NULL, "don't record events from perf itself",
1013 exclude_perf),
1006 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1014 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1007 "record events on existing process id"), 1015 "record events on existing process id"),
1008 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1016 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
@@ -1041,7 +1049,9 @@ struct option __record_options[] = {
1041 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1049 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1042 "per thread counts"), 1050 "per thread counts"),
1043 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1051 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1044 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"), 1052 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1053 &record.opts.sample_time_set,
1054 "Record the sample timestamps"),
1045 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1055 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1046 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1056 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1047 "don't sample"), 1057 "don't sample"),
@@ -1081,6 +1091,8 @@ struct option __record_options[] = {
1081 "opts", "AUX area tracing Snapshot Mode", ""), 1091 "opts", "AUX area tracing Snapshot Mode", ""),
1082 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1092 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1083 "per thread proc mmap processing timeout in ms"), 1093 "per thread proc mmap processing timeout in ms"),
1094 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1095 "Record context switch events"),
1084 OPT_END() 1096 OPT_END()
1085}; 1097};
1086 1098
@@ -1108,6 +1120,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1108 " system-wide mode\n"); 1120 " system-wide mode\n");
1109 usage_with_options(record_usage, record_options); 1121 usage_with_options(record_usage, record_options);
1110 } 1122 }
1123 if (rec->opts.record_switch_events &&
1124 !perf_can_record_switch_events()) {
1125 ui__error("kernel does not support recording context switch events (--switch-events option)\n");
1126 usage_with_options(record_usage, record_options);
1127 }
1111 1128
1112 if (!rec->itr) { 1129 if (!rec->itr) {
1113 rec->itr = auxtrace_record__init(rec->evlist, &err); 1130 rec->itr = auxtrace_record__init(rec->evlist, &err);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 95a47719aec3..62b285e32aa5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,7 @@ struct report {
53 bool mem_mode; 53 bool mem_mode;
54 bool header; 54 bool header;
55 bool header_only; 55 bool header_only;
56 bool nonany_branch_mode;
56 int max_stack; 57 int max_stack;
57 struct perf_read_values show_threads_values; 58 struct perf_read_values show_threads_values;
58 const char *pretty_printing_style; 59 const char *pretty_printing_style;
@@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
102 if (!ui__has_annotation()) 103 if (!ui__has_annotation())
103 return 0; 104 return 0;
104 105
106 hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
107 rep->nonany_branch_mode);
108
105 if (sort__mode == SORT_MODE__BRANCH) { 109 if (sort__mode == SORT_MODE__BRANCH) {
106 bi = he->branch_info; 110 bi = he->branch_info;
107 err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); 111 err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
@@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
258 else 262 else
259 callchain_param.record_mode = CALLCHAIN_FP; 263 callchain_param.record_mode = CALLCHAIN_FP;
260 } 264 }
265
266 /* ??? handle more cases than just ANY? */
267 if (!(perf_evlist__combined_branch_type(session->evlist) &
268 PERF_SAMPLE_BRANCH_ANY))
269 rep->nonany_branch_mode = true;
270
261 return 0; 271 return 0;
262} 272}
263 273
@@ -306,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
306 if (evname != NULL) 316 if (evname != NULL)
307 ret += fprintf(fp, " of event '%s'", evname); 317 ret += fprintf(fp, " of event '%s'", evname);
308 318
319 if (symbol_conf.show_ref_callgraph &&
320 strstr(evname, "call-graph=no")) {
321 ret += fprintf(fp, ", show reference callgraph");
322 }
323
309 if (rep->mem_mode) { 324 if (rep->mem_mode) {
310 ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); 325 ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
311 ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); 326 ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order);
@@ -728,6 +743,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
728 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", 743 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
729 "Instruction Tracing options", 744 "Instruction Tracing options",
730 itrace_parse_synth_opts), 745 itrace_parse_synth_opts),
746 OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
747 "Show full source file name path for source lines"),
748 OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
749 "Show callgraph from reference event"),
731 OPT_END() 750 OPT_END()
732 }; 751 };
733 struct perf_data_file file = { 752 struct perf_data_file file = {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 24809787369f..4430340292c0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -623,6 +623,7 @@ struct perf_script {
623 struct perf_session *session; 623 struct perf_session *session;
624 bool show_task_events; 624 bool show_task_events;
625 bool show_mmap_events; 625 bool show_mmap_events;
626 bool show_switch_events;
626}; 627};
627 628
628static int process_attr(struct perf_tool *tool, union perf_event *event, 629static int process_attr(struct perf_tool *tool, union perf_event *event,
@@ -661,7 +662,7 @@ static int process_comm_event(struct perf_tool *tool,
661 struct thread *thread; 662 struct thread *thread;
662 struct perf_script *script = container_of(tool, struct perf_script, tool); 663 struct perf_script *script = container_of(tool, struct perf_script, tool);
663 struct perf_session *session = script->session; 664 struct perf_session *session = script->session;
664 struct perf_evsel *evsel = perf_evlist__first(session->evlist); 665 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
665 int ret = -1; 666 int ret = -1;
666 667
667 thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); 668 thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
@@ -695,7 +696,7 @@ static int process_fork_event(struct perf_tool *tool,
695 struct thread *thread; 696 struct thread *thread;
696 struct perf_script *script = container_of(tool, struct perf_script, tool); 697 struct perf_script *script = container_of(tool, struct perf_script, tool);
697 struct perf_session *session = script->session; 698 struct perf_session *session = script->session;
698 struct perf_evsel *evsel = perf_evlist__first(session->evlist); 699 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
699 700
700 if (perf_event__process_fork(tool, event, sample, machine) < 0) 701 if (perf_event__process_fork(tool, event, sample, machine) < 0)
701 return -1; 702 return -1;
@@ -727,7 +728,7 @@ static int process_exit_event(struct perf_tool *tool,
727 struct thread *thread; 728 struct thread *thread;
728 struct perf_script *script = container_of(tool, struct perf_script, tool); 729 struct perf_script *script = container_of(tool, struct perf_script, tool);
729 struct perf_session *session = script->session; 730 struct perf_session *session = script->session;
730 struct perf_evsel *evsel = perf_evlist__first(session->evlist); 731 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
731 732
732 thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); 733 thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
733 if (thread == NULL) { 734 if (thread == NULL) {
@@ -759,7 +760,7 @@ static int process_mmap_event(struct perf_tool *tool,
759 struct thread *thread; 760 struct thread *thread;
760 struct perf_script *script = container_of(tool, struct perf_script, tool); 761 struct perf_script *script = container_of(tool, struct perf_script, tool);
761 struct perf_session *session = script->session; 762 struct perf_session *session = script->session;
762 struct perf_evsel *evsel = perf_evlist__first(session->evlist); 763 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
763 764
764 if (perf_event__process_mmap(tool, event, sample, machine) < 0) 765 if (perf_event__process_mmap(tool, event, sample, machine) < 0)
765 return -1; 766 return -1;
@@ -790,7 +791,7 @@ static int process_mmap2_event(struct perf_tool *tool,
790 struct thread *thread; 791 struct thread *thread;
791 struct perf_script *script = container_of(tool, struct perf_script, tool); 792 struct perf_script *script = container_of(tool, struct perf_script, tool);
792 struct perf_session *session = script->session; 793 struct perf_session *session = script->session;
793 struct perf_evsel *evsel = perf_evlist__first(session->evlist); 794 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
794 795
795 if (perf_event__process_mmap2(tool, event, sample, machine) < 0) 796 if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
796 return -1; 797 return -1;
@@ -813,6 +814,32 @@ static int process_mmap2_event(struct perf_tool *tool,
813 return 0; 814 return 0;
814} 815}
815 816
817static int process_switch_event(struct perf_tool *tool,
818 union perf_event *event,
819 struct perf_sample *sample,
820 struct machine *machine)
821{
822 struct thread *thread;
823 struct perf_script *script = container_of(tool, struct perf_script, tool);
824 struct perf_session *session = script->session;
825 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
826
827 if (perf_event__process_switch(tool, event, sample, machine) < 0)
828 return -1;
829
830 thread = machine__findnew_thread(machine, sample->pid,
831 sample->tid);
832 if (thread == NULL) {
833 pr_debug("problem processing SWITCH event, skipping it.\n");
834 return -1;
835 }
836
837 print_sample_start(sample, thread, evsel);
838 perf_event__fprintf(event, stdout);
839 thread__put(thread);
840 return 0;
841}
842
816static void sig_handler(int sig __maybe_unused) 843static void sig_handler(int sig __maybe_unused)
817{ 844{
818 session_done = 1; 845 session_done = 1;
@@ -834,6 +861,8 @@ static int __cmd_script(struct perf_script *script)
834 script->tool.mmap = process_mmap_event; 861 script->tool.mmap = process_mmap_event;
835 script->tool.mmap2 = process_mmap2_event; 862 script->tool.mmap2 = process_mmap2_event;
836 } 863 }
864 if (script->show_switch_events)
865 script->tool.context_switch = process_switch_event;
837 866
838 ret = perf_session__process_events(script->session); 867 ret = perf_session__process_events(script->session);
839 868
@@ -1532,6 +1561,22 @@ static int have_cmd(int argc, const char **argv)
1532 return 0; 1561 return 0;
1533} 1562}
1534 1563
1564static void script__setup_sample_type(struct perf_script *script)
1565{
1566 struct perf_session *session = script->session;
1567 u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
1568
1569 if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
1570 if ((sample_type & PERF_SAMPLE_REGS_USER) &&
1571 (sample_type & PERF_SAMPLE_STACK_USER))
1572 callchain_param.record_mode = CALLCHAIN_DWARF;
1573 else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
1574 callchain_param.record_mode = CALLCHAIN_LBR;
1575 else
1576 callchain_param.record_mode = CALLCHAIN_FP;
1577 }
1578}
1579
1535int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) 1580int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1536{ 1581{
1537 bool show_full_info = false; 1582 bool show_full_info = false;
@@ -1618,10 +1663,19 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1618 "Show the fork/comm/exit events"), 1663 "Show the fork/comm/exit events"),
1619 OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, 1664 OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
1620 "Show the mmap events"), 1665 "Show the mmap events"),
1666 OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
1667 "Show context switch events (if recorded)"),
1621 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 1668 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
1622 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", 1669 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
1623 "Instruction Tracing options", 1670 "Instruction Tracing options",
1624 itrace_parse_synth_opts), 1671 itrace_parse_synth_opts),
1672 OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
1673 "Show full source file name path for source lines"),
1674 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
1675 "Enable symbol demangling"),
1676 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
1677 "Enable kernel symbol demangling"),
1678
1625 OPT_END() 1679 OPT_END()
1626 }; 1680 };
1627 const char * const script_subcommands[] = { "record", "report", NULL }; 1681 const char * const script_subcommands[] = { "record", "report", NULL };
@@ -1816,6 +1870,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1816 goto out_delete; 1870 goto out_delete;
1817 1871
1818 script.session = session; 1872 script.session = session;
1873 script__setup_sample_type(&script);
1819 1874
1820 session->itrace_synth_opts = &itrace_synth_opts; 1875 session->itrace_synth_opts = &itrace_synth_opts;
1821 1876
@@ -1830,6 +1885,14 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1830 else 1885 else
1831 symbol_conf.use_callchain = false; 1886 symbol_conf.use_callchain = false;
1832 1887
1888 if (session->tevent.pevent &&
1889 pevent_set_function_resolver(session->tevent.pevent,
1890 machine__resolve_kernel_addr,
1891 &session->machines.host) < 0) {
1892 pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
1893 return -1;
1894 }
1895
1833 if (generate_script_lang) { 1896 if (generate_script_lang) {
1834 struct stat perf_stat; 1897 struct stat perf_stat;
1835 int input; 1898 int input;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d99d850e1444..d46dbb1bc65d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -58,6 +58,7 @@
58#include "util/cpumap.h" 58#include "util/cpumap.h"
59#include "util/thread.h" 59#include "util/thread.h"
60#include "util/thread_map.h" 60#include "util/thread_map.h"
61#include "util/counts.h"
61 62
62#include <stdlib.h> 63#include <stdlib.h>
63#include <sys/prctl.h> 64#include <sys/prctl.h>
@@ -101,8 +102,6 @@ static struct target target = {
101 102
102static int run_count = 1; 103static int run_count = 1;
103static bool no_inherit = false; 104static bool no_inherit = false;
104static bool scale = true;
105static enum aggr_mode aggr_mode = AGGR_GLOBAL;
106static volatile pid_t child_pid = -1; 105static volatile pid_t child_pid = -1;
107static bool null_run = false; 106static bool null_run = false;
108static int detailed_run = 0; 107static int detailed_run = 0;
@@ -112,11 +111,9 @@ static int big_num_opt = -1;
112static const char *csv_sep = NULL; 111static const char *csv_sep = NULL;
113static bool csv_output = false; 112static bool csv_output = false;
114static bool group = false; 113static bool group = false;
115static FILE *output = NULL;
116static const char *pre_cmd = NULL; 114static const char *pre_cmd = NULL;
117static const char *post_cmd = NULL; 115static const char *post_cmd = NULL;
118static bool sync_run = false; 116static bool sync_run = false;
119static unsigned int interval = 0;
120static unsigned int initial_delay = 0; 117static unsigned int initial_delay = 0;
121static unsigned int unit_width = 4; /* strlen("unit") */ 118static unsigned int unit_width = 4; /* strlen("unit") */
122static bool forever = false; 119static bool forever = false;
@@ -126,6 +123,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu);
126 123
127static volatile int done = 0; 124static volatile int done = 0;
128 125
126static struct perf_stat_config stat_config = {
127 .aggr_mode = AGGR_GLOBAL,
128 .scale = true,
129};
130
129static inline void diff_timespec(struct timespec *r, struct timespec *a, 131static inline void diff_timespec(struct timespec *r, struct timespec *a,
130 struct timespec *b) 132 struct timespec *b)
131{ 133{
@@ -148,7 +150,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
148{ 150{
149 struct perf_event_attr *attr = &evsel->attr; 151 struct perf_event_attr *attr = &evsel->attr;
150 152
151 if (scale) 153 if (stat_config.scale)
152 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 154 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
153 PERF_FORMAT_TOTAL_TIME_RUNNING; 155 PERF_FORMAT_TOTAL_TIME_RUNNING;
154 156
@@ -178,142 +180,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
178 return 0; 180 return 0;
179} 181}
180 182
181static void zero_per_pkg(struct perf_evsel *counter)
182{
183 if (counter->per_pkg_mask)
184 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
185}
186
187static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
188{
189 unsigned long *mask = counter->per_pkg_mask;
190 struct cpu_map *cpus = perf_evsel__cpus(counter);
191 int s;
192
193 *skip = false;
194
195 if (!counter->per_pkg)
196 return 0;
197
198 if (cpu_map__empty(cpus))
199 return 0;
200
201 if (!mask) {
202 mask = zalloc(MAX_NR_CPUS);
203 if (!mask)
204 return -ENOMEM;
205
206 counter->per_pkg_mask = mask;
207 }
208
209 s = cpu_map__get_socket(cpus, cpu);
210 if (s < 0)
211 return -1;
212
213 *skip = test_and_set_bit(s, mask) == 1;
214 return 0;
215}
216
217static int
218process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
219 struct perf_counts_values *count)
220{
221 struct perf_counts_values *aggr = &evsel->counts->aggr;
222 static struct perf_counts_values zero;
223 bool skip = false;
224
225 if (check_per_pkg(evsel, cpu, &skip)) {
226 pr_err("failed to read per-pkg counter\n");
227 return -1;
228 }
229
230 if (skip)
231 count = &zero;
232
233 switch (aggr_mode) {
234 case AGGR_THREAD:
235 case AGGR_CORE:
236 case AGGR_SOCKET:
237 case AGGR_NONE:
238 if (!evsel->snapshot)
239 perf_evsel__compute_deltas(evsel, cpu, thread, count);
240 perf_counts_values__scale(count, scale, NULL);
241 if (aggr_mode == AGGR_NONE)
242 perf_stat__update_shadow_stats(evsel, count->values, cpu);
243 break;
244 case AGGR_GLOBAL:
245 aggr->val += count->val;
246 if (scale) {
247 aggr->ena += count->ena;
248 aggr->run += count->run;
249 }
250 default:
251 break;
252 }
253
254 return 0;
255}
256
257static int process_counter_maps(struct perf_evsel *counter)
258{
259 int nthreads = thread_map__nr(counter->threads);
260 int ncpus = perf_evsel__nr_cpus(counter);
261 int cpu, thread;
262
263 if (counter->system_wide)
264 nthreads = 1;
265
266 for (thread = 0; thread < nthreads; thread++) {
267 for (cpu = 0; cpu < ncpus; cpu++) {
268 if (process_counter_values(counter, cpu, thread,
269 perf_counts(counter->counts, cpu, thread)))
270 return -1;
271 }
272 }
273
274 return 0;
275}
276
277static int process_counter(struct perf_evsel *counter)
278{
279 struct perf_counts_values *aggr = &counter->counts->aggr;
280 struct perf_stat *ps = counter->priv;
281 u64 *count = counter->counts->aggr.values;
282 int i, ret;
283
284 aggr->val = aggr->ena = aggr->run = 0;
285 init_stats(ps->res_stats);
286
287 if (counter->per_pkg)
288 zero_per_pkg(counter);
289
290 ret = process_counter_maps(counter);
291 if (ret)
292 return ret;
293
294 if (aggr_mode != AGGR_GLOBAL)
295 return 0;
296
297 if (!counter->snapshot)
298 perf_evsel__compute_deltas(counter, -1, -1, aggr);
299 perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
300
301 for (i = 0; i < 3; i++)
302 update_stats(&ps->res_stats[i], count[i]);
303
304 if (verbose) {
305 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
306 perf_evsel__name(counter), count[0], count[1], count[2]);
307 }
308
309 /*
310 * Save the full runtime - to allow normalization during printout:
311 */
312 perf_stat__update_shadow_stats(counter, count, 0);
313
314 return 0;
315}
316
317/* 183/*
318 * Read out the results of a single counter: 184 * Read out the results of a single counter:
319 * do not aggregate counts across CPUs in system-wide mode 185 * do not aggregate counts across CPUs in system-wide mode
@@ -351,7 +217,7 @@ static void read_counters(bool close_counters)
351 if (read_counter(counter)) 217 if (read_counter(counter))
352 pr_warning("failed to read counter %s\n", counter->name); 218 pr_warning("failed to read counter %s\n", counter->name);
353 219
354 if (process_counter(counter)) 220 if (perf_stat_process_counter(&stat_config, counter))
355 pr_warning("failed to process counter %s\n", counter->name); 221 pr_warning("failed to process counter %s\n", counter->name);
356 222
357 if (close_counters) { 223 if (close_counters) {
@@ -402,6 +268,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
402 268
403static int __run_perf_stat(int argc, const char **argv) 269static int __run_perf_stat(int argc, const char **argv)
404{ 270{
271 int interval = stat_config.interval;
405 char msg[512]; 272 char msg[512];
406 unsigned long long t0, t1; 273 unsigned long long t0, t1;
407 struct perf_evsel *counter; 274 struct perf_evsel *counter;
@@ -545,13 +412,13 @@ static int run_perf_stat(int argc, const char **argv)
545static void print_running(u64 run, u64 ena) 412static void print_running(u64 run, u64 ena)
546{ 413{
547 if (csv_output) { 414 if (csv_output) {
548 fprintf(output, "%s%" PRIu64 "%s%.2f", 415 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
549 csv_sep, 416 csv_sep,
550 run, 417 run,
551 csv_sep, 418 csv_sep,
552 ena ? 100.0 * run / ena : 100.0); 419 ena ? 100.0 * run / ena : 100.0);
553 } else if (run != ena) { 420 } else if (run != ena) {
554 fprintf(output, " (%.2f%%)", 100.0 * run / ena); 421 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
555 } 422 }
556} 423}
557 424
@@ -560,9 +427,9 @@ static void print_noise_pct(double total, double avg)
560 double pct = rel_stddev_stats(total, avg); 427 double pct = rel_stddev_stats(total, avg);
561 428
562 if (csv_output) 429 if (csv_output)
563 fprintf(output, "%s%.2f%%", csv_sep, pct); 430 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
564 else if (pct) 431 else if (pct)
565 fprintf(output, " ( +-%6.2f%% )", pct); 432 fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
566} 433}
567 434
568static void print_noise(struct perf_evsel *evsel, double avg) 435static void print_noise(struct perf_evsel *evsel, double avg)
@@ -578,9 +445,9 @@ static void print_noise(struct perf_evsel *evsel, double avg)
578 445
579static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 446static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
580{ 447{
581 switch (aggr_mode) { 448 switch (stat_config.aggr_mode) {
582 case AGGR_CORE: 449 case AGGR_CORE:
583 fprintf(output, "S%d-C%*d%s%*d%s", 450 fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
584 cpu_map__id_to_socket(id), 451 cpu_map__id_to_socket(id),
585 csv_output ? 0 : -8, 452 csv_output ? 0 : -8,
586 cpu_map__id_to_cpu(id), 453 cpu_map__id_to_cpu(id),
@@ -590,7 +457,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
590 csv_sep); 457 csv_sep);
591 break; 458 break;
592 case AGGR_SOCKET: 459 case AGGR_SOCKET:
593 fprintf(output, "S%*d%s%*d%s", 460 fprintf(stat_config.output, "S%*d%s%*d%s",
594 csv_output ? 0 : -5, 461 csv_output ? 0 : -5,
595 id, 462 id,
596 csv_sep, 463 csv_sep,
@@ -599,12 +466,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
599 csv_sep); 466 csv_sep);
600 break; 467 break;
601 case AGGR_NONE: 468 case AGGR_NONE:
602 fprintf(output, "CPU%*d%s", 469 fprintf(stat_config.output, "CPU%*d%s",
603 csv_output ? 0 : -4, 470 csv_output ? 0 : -4,
604 perf_evsel__cpus(evsel)->map[id], csv_sep); 471 perf_evsel__cpus(evsel)->map[id], csv_sep);
605 break; 472 break;
606 case AGGR_THREAD: 473 case AGGR_THREAD:
607 fprintf(output, "%*s-%*d%s", 474 fprintf(stat_config.output, "%*s-%*d%s",
608 csv_output ? 0 : 16, 475 csv_output ? 0 : 16,
609 thread_map__comm(evsel->threads, id), 476 thread_map__comm(evsel->threads, id),
610 csv_output ? 0 : -8, 477 csv_output ? 0 : -8,
@@ -619,6 +486,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
619 486
620static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 487static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
621{ 488{
489 FILE *output = stat_config.output;
622 double msecs = avg / 1e6; 490 double msecs = avg / 1e6;
623 const char *fmt_v, *fmt_n; 491 const char *fmt_v, *fmt_n;
624 char name[25]; 492 char name[25];
@@ -643,7 +511,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
643 if (evsel->cgrp) 511 if (evsel->cgrp)
644 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 512 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
645 513
646 if (csv_output || interval) 514 if (csv_output || stat_config.interval)
647 return; 515 return;
648 516
649 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 517 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -655,6 +523,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
655 523
656static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 524static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
657{ 525{
526 FILE *output = stat_config.output;
658 double sc = evsel->scale; 527 double sc = evsel->scale;
659 const char *fmt; 528 const char *fmt;
660 int cpu = cpu_map__id_to_cpu(id); 529 int cpu = cpu_map__id_to_cpu(id);
@@ -670,7 +539,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
670 539
671 aggr_printout(evsel, id, nr); 540 aggr_printout(evsel, id, nr);
672 541
673 if (aggr_mode == AGGR_GLOBAL) 542 if (stat_config.aggr_mode == AGGR_GLOBAL)
674 cpu = 0; 543 cpu = 0;
675 544
676 fprintf(output, fmt, avg, csv_sep); 545 fprintf(output, fmt, avg, csv_sep);
@@ -685,16 +554,18 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
685 if (evsel->cgrp) 554 if (evsel->cgrp)
686 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 555 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
687 556
688 if (csv_output || interval) 557 if (csv_output || stat_config.interval)
689 return; 558 return;
690 559
691 perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); 560 perf_stat__print_shadow_stats(output, evsel, avg, cpu,
561 stat_config.aggr_mode);
692} 562}
693 563
694static void print_aggr(char *prefix) 564static void print_aggr(char *prefix)
695{ 565{
566 FILE *output = stat_config.output;
696 struct perf_evsel *counter; 567 struct perf_evsel *counter;
697 int cpu, cpu2, s, s2, id, nr; 568 int cpu, s, s2, id, nr;
698 double uval; 569 double uval;
699 u64 ena, run, val; 570 u64 ena, run, val;
700 571
@@ -707,8 +578,7 @@ static void print_aggr(char *prefix)
707 val = ena = run = 0; 578 val = ena = run = 0;
708 nr = 0; 579 nr = 0;
709 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 580 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
710 cpu2 = perf_evsel__cpus(counter)->map[cpu]; 581 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
711 s2 = aggr_get_id(evsel_list->cpus, cpu2);
712 if (s2 != id) 582 if (s2 != id)
713 continue; 583 continue;
714 val += perf_counts(counter->counts, cpu, 0)->val; 584 val += perf_counts(counter->counts, cpu, 0)->val;
@@ -761,6 +631,7 @@ static void print_aggr(char *prefix)
761 631
762static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 632static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
763{ 633{
634 FILE *output = stat_config.output;
764 int nthreads = thread_map__nr(counter->threads); 635 int nthreads = thread_map__nr(counter->threads);
765 int ncpus = cpu_map__nr(counter->cpus); 636 int ncpus = cpu_map__nr(counter->cpus);
766 int cpu, thread; 637 int cpu, thread;
@@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
799 */ 670 */
800static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 671static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
801{ 672{
673 FILE *output = stat_config.output;
802 struct perf_stat *ps = counter->priv; 674 struct perf_stat *ps = counter->priv;
803 double avg = avg_stats(&ps->res_stats[0]); 675 double avg = avg_stats(&ps->res_stats[0]);
804 int scaled = counter->counts->scaled; 676 int scaled = counter->counts->scaled;
@@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
850 */ 722 */
851static void print_counter(struct perf_evsel *counter, char *prefix) 723static void print_counter(struct perf_evsel *counter, char *prefix)
852{ 724{
725 FILE *output = stat_config.output;
853 u64 ena, run, val; 726 u64 ena, run, val;
854 double uval; 727 double uval;
855 int cpu; 728 int cpu;
@@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
904 777
905static void print_interval(char *prefix, struct timespec *ts) 778static void print_interval(char *prefix, struct timespec *ts)
906{ 779{
780 FILE *output = stat_config.output;
907 static int num_print_interval; 781 static int num_print_interval;
908 782
909 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 783 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
910 784
911 if (num_print_interval == 0 && !csv_output) { 785 if (num_print_interval == 0 && !csv_output) {
912 switch (aggr_mode) { 786 switch (stat_config.aggr_mode) {
913 case AGGR_SOCKET: 787 case AGGR_SOCKET:
914 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 788 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
915 break; 789 break;
@@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts)
934 808
935static void print_header(int argc, const char **argv) 809static void print_header(int argc, const char **argv)
936{ 810{
811 FILE *output = stat_config.output;
937 int i; 812 int i;
938 813
939 fflush(stdout); 814 fflush(stdout);
@@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv)
963 838
964static void print_footer(void) 839static void print_footer(void)
965{ 840{
841 FILE *output = stat_config.output;
842
966 if (!null_run) 843 if (!null_run)
967 fprintf(output, "\n"); 844 fprintf(output, "\n");
968 fprintf(output, " %17.9f seconds time elapsed", 845 fprintf(output, " %17.9f seconds time elapsed",
@@ -977,6 +854,7 @@ static void print_footer(void)
977 854
978static void print_counters(struct timespec *ts, int argc, const char **argv) 855static void print_counters(struct timespec *ts, int argc, const char **argv)
979{ 856{
857 int interval = stat_config.interval;
980 struct perf_evsel *counter; 858 struct perf_evsel *counter;
981 char buf[64], *prefix = NULL; 859 char buf[64], *prefix = NULL;
982 860
@@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
985 else 863 else
986 print_header(argc, argv); 864 print_header(argc, argv);
987 865
988 switch (aggr_mode) { 866 switch (stat_config.aggr_mode) {
989 case AGGR_CORE: 867 case AGGR_CORE:
990 case AGGR_SOCKET: 868 case AGGR_SOCKET:
991 print_aggr(prefix); 869 print_aggr(prefix);
@@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
1009 if (!interval && !csv_output) 887 if (!interval && !csv_output)
1010 print_footer(); 888 print_footer();
1011 889
1012 fflush(output); 890 fflush(stat_config.output);
1013} 891}
1014 892
1015static volatile int signr = -1; 893static volatile int signr = -1;
1016 894
1017static void skip_signal(int signo) 895static void skip_signal(int signo)
1018{ 896{
1019 if ((child_pid == -1) || interval) 897 if ((child_pid == -1) || stat_config.interval)
1020 done = 1; 898 done = 1;
1021 899
1022 signr = signo; 900 signr = signo;
@@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
1064 942
1065static int perf_stat_init_aggr_mode(void) 943static int perf_stat_init_aggr_mode(void)
1066{ 944{
1067 switch (aggr_mode) { 945 switch (stat_config.aggr_mode) {
1068 case AGGR_SOCKET: 946 case AGGR_SOCKET:
1069 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 947 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1070 perror("cannot build socket map"); 948 perror("cannot build socket map");
@@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1270 "system-wide collection from all CPUs"), 1148 "system-wide collection from all CPUs"),
1271 OPT_BOOLEAN('g', "group", &group, 1149 OPT_BOOLEAN('g', "group", &group,
1272 "put the counters into a counter group"), 1150 "put the counters into a counter group"),
1273 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1151 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1274 OPT_INCR('v', "verbose", &verbose, 1152 OPT_INCR('v', "verbose", &verbose,
1275 "be more verbose (show counter open errors, etc)"), 1153 "be more verbose (show counter open errors, etc)"),
1276 OPT_INTEGER('r', "repeat", &run_count, 1154 OPT_INTEGER('r', "repeat", &run_count,
@@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1286 stat__set_big_num), 1164 stat__set_big_num),
1287 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1165 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1288 "list of cpus to monitor in system-wide"), 1166 "list of cpus to monitor in system-wide"),
1289 OPT_SET_UINT('A', "no-aggr", &aggr_mode, 1167 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1290 "disable CPU count aggregation", AGGR_NONE), 1168 "disable CPU count aggregation", AGGR_NONE),
1291 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1169 OPT_STRING('x', "field-separator", &csv_sep, "separator",
1292 "print counts with custom separator"), 1170 "print counts with custom separator"),
@@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1300 "command to run prior to the measured command"), 1178 "command to run prior to the measured command"),
1301 OPT_STRING(0, "post", &post_cmd, "command", 1179 OPT_STRING(0, "post", &post_cmd, "command",
1302 "command to run after to the measured command"), 1180 "command to run after to the measured command"),
1303 OPT_UINTEGER('I', "interval-print", &interval, 1181 OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1304 "print counts at regular interval in ms (>= 100)"), 1182 "print counts at regular interval in ms (>= 100)"),
1305 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1183 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1306 "aggregate counts per processor socket", AGGR_SOCKET), 1184 "aggregate counts per processor socket", AGGR_SOCKET),
1307 OPT_SET_UINT(0, "per-core", &aggr_mode, 1185 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1308 "aggregate counts per physical processor core", AGGR_CORE), 1186 "aggregate counts per physical processor core", AGGR_CORE),
1309 OPT_SET_UINT(0, "per-thread", &aggr_mode, 1187 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1310 "aggregate counts per thread", AGGR_THREAD), 1188 "aggregate counts per thread", AGGR_THREAD),
1311 OPT_UINTEGER('D', "delay", &initial_delay, 1189 OPT_UINTEGER('D', "delay", &initial_delay,
1312 "ms to wait before starting measurement after program start"), 1190 "ms to wait before starting measurement after program start"),
@@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1318 }; 1196 };
1319 int status = -EINVAL, run_idx; 1197 int status = -EINVAL, run_idx;
1320 const char *mode; 1198 const char *mode;
1199 FILE *output = stderr;
1200 unsigned int interval;
1321 1201
1322 setlocale(LC_ALL, ""); 1202 setlocale(LC_ALL, "");
1323 1203
@@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1328 argc = parse_options(argc, argv, options, stat_usage, 1208 argc = parse_options(argc, argv, options, stat_usage,
1329 PARSE_OPT_STOP_AT_NON_OPTION); 1209 PARSE_OPT_STOP_AT_NON_OPTION);
1330 1210
1331 output = stderr; 1211 interval = stat_config.interval;
1212
1332 if (output_name && strcmp(output_name, "-")) 1213 if (output_name && strcmp(output_name, "-"))
1333 output = NULL; 1214 output = NULL;
1334 1215
@@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1365 } 1246 }
1366 } 1247 }
1367 1248
1249 stat_config.output = output;
1250
1368 if (csv_sep) { 1251 if (csv_sep) {
1369 csv_output = true; 1252 csv_output = true;
1370 if (!strcmp(csv_sep, "\\t")) 1253 if (!strcmp(csv_sep, "\\t"))
@@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1399 run_count = 1; 1282 run_count = 1;
1400 } 1283 }
1401 1284
1402 if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 1285 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1403 fprintf(stderr, "The --per-thread option is only available " 1286 fprintf(stderr, "The --per-thread option is only available "
1404 "when monitoring via -p -t options.\n"); 1287 "when monitoring via -p -t options.\n");
1405 parse_options_usage(NULL, options, "p", 1); 1288 parse_options_usage(NULL, options, "p", 1);
@@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1411 * no_aggr, cgroup are for system-wide only 1294 * no_aggr, cgroup are for system-wide only
1412 * --per-thread is aggregated per thread, we dont mix it with cpu mode 1295 * --per-thread is aggregated per thread, we dont mix it with cpu mode
1413 */ 1296 */
1414 if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && 1297 if (((stat_config.aggr_mode != AGGR_GLOBAL &&
1298 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1415 !target__has_cpu(&target)) { 1299 !target__has_cpu(&target)) {
1416 fprintf(stderr, "both cgroup and no-aggregation " 1300 fprintf(stderr, "both cgroup and no-aggregation "
1417 "modes only available in system-wide mode\n"); 1301 "modes only available in system-wide mode\n");
@@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1444 * Initialize thread_map with comm names, 1328 * Initialize thread_map with comm names,
1445 * so we could print it out on output. 1329 * so we could print it out on output.
1446 */ 1330 */
1447 if (aggr_mode == AGGR_THREAD) 1331 if (stat_config.aggr_mode == AGGR_THREAD)
1448 thread_map__read_comms(evsel_list->threads); 1332 thread_map__read_comms(evsel_list->threads);
1449 1333
1450 if (interval && interval < 100) { 1334 if (interval && interval < 100) {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6135cc07213c..8c465c83aabf 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
40#include "util/xyarray.h" 40#include "util/xyarray.h"
41#include "util/sort.h" 41#include "util/sort.h"
42#include "util/intlist.h" 42#include "util/intlist.h"
43#include "util/parse-branch-options.h"
43#include "arch/common.h" 44#include "arch/common.h"
44 45
45#include "util/debug.h" 46#include "util/debug.h"
@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
695 perf_top__record_precise_ip(top, he, evsel->idx, ip); 696 perf_top__record_precise_ip(top, he, evsel->idx, ip);
696 } 697 }
697 698
699 hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
700 !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
698 return 0; 701 return 0;
699} 702}
700 703
@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1171 "don't try to adjust column width, use these fixed values"), 1174 "don't try to adjust column width, use these fixed values"),
1172 OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, 1175 OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
1173 "per thread proc mmap processing timeout in ms"), 1176 "per thread proc mmap processing timeout in ms"),
1177 OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
1178 "branch any", "sample any taken branches",
1179 parse_branch_stack),
1180 OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
1181 "branch filter mask", "branch stack filter modes",
1182 parse_branch_stack),
1174 OPT_END() 1183 OPT_END()
1175 }; 1184 };
1176 const char * const top_usage[] = { 1185 const char * const top_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 39ad4d0ca884..4e3abba03062 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1,8 +1,27 @@
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
1#include <traceevent/event-parse.h> 19#include <traceevent/event-parse.h>
2#include "builtin.h" 20#include "builtin.h"
3#include "util/color.h" 21#include "util/color.h"
4#include "util/debug.h" 22#include "util/debug.h"
5#include "util/evlist.h" 23#include "util/evlist.h"
24#include "util/exec_cmd.h"
6#include "util/machine.h" 25#include "util/machine.h"
7#include "util/session.h" 26#include "util/session.h"
8#include "util/thread.h" 27#include "util/thread.h"
@@ -26,6 +45,7 @@
26 45
27#ifndef MADV_HWPOISON 46#ifndef MADV_HWPOISON
28# define MADV_HWPOISON 100 47# define MADV_HWPOISON 100
48
29#endif 49#endif
30 50
31#ifndef MADV_MERGEABLE 51#ifndef MADV_MERGEABLE
@@ -247,42 +267,6 @@ out_delete:
247 ({ struct syscall_tp *fields = evsel->priv; \ 267 ({ struct syscall_tp *fields = evsel->priv; \
248 fields->name.pointer(&fields->name, sample); }) 268 fields->name.pointer(&fields->name, sample); })
249 269
250static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
251 void *sys_enter_handler,
252 void *sys_exit_handler)
253{
254 int ret = -1;
255 struct perf_evsel *sys_enter, *sys_exit;
256
257 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
258 if (sys_enter == NULL)
259 goto out;
260
261 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
262 goto out_delete_sys_enter;
263
264 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
265 if (sys_exit == NULL)
266 goto out_delete_sys_enter;
267
268 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
269 goto out_delete_sys_exit;
270
271 perf_evlist__add(evlist, sys_enter);
272 perf_evlist__add(evlist, sys_exit);
273
274 ret = 0;
275out:
276 return ret;
277
278out_delete_sys_exit:
279 perf_evsel__delete_priv(sys_exit);
280out_delete_sys_enter:
281 perf_evsel__delete_priv(sys_enter);
282 goto out;
283}
284
285
286struct syscall_arg { 270struct syscall_arg {
287 unsigned long val; 271 unsigned long val;
288 struct thread *thread; 272 struct thread *thread;
@@ -604,6 +588,15 @@ static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
604static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 588static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
605static DEFINE_STRARRAY(itimers); 589static DEFINE_STRARRAY(itimers);
606 590
591static const char *keyctl_options[] = {
592 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
593 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
594 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
595 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
596 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
597};
598static DEFINE_STRARRAY(keyctl_options);
599
607static const char *whences[] = { "SET", "CUR", "END", 600static const char *whences[] = { "SET", "CUR", "END",
608#ifdef SEEK_DATA 601#ifdef SEEK_DATA
609"DATA", 602"DATA",
@@ -634,7 +627,8 @@ static DEFINE_STRARRAY(sighow);
634 627
635static const char *clockid[] = { 628static const char *clockid[] = {
636 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 629 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
637 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 630 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
631 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
638}; 632};
639static DEFINE_STRARRAY(clockid); 633static DEFINE_STRARRAY(clockid);
640 634
@@ -779,6 +773,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
779 773
780#define SCA_ACCMODE syscall_arg__scnprintf_access_mode 774#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
781 775
776static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
777 struct syscall_arg *arg);
778
779#define SCA_FILENAME syscall_arg__scnprintf_filename
780
782static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 781static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
783 struct syscall_arg *arg) 782 struct syscall_arg *arg)
784{ 783{
@@ -1006,14 +1005,23 @@ static struct syscall_fmt {
1006 bool hexret; 1005 bool hexret;
1007} syscall_fmts[] = { 1006} syscall_fmts[] = {
1008 { .name = "access", .errmsg = true, 1007 { .name = "access", .errmsg = true,
1009 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 1008 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1009 [1] = SCA_ACCMODE, /* mode */ }, },
1010 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 1010 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1011 { .name = "brk", .hexret = true, 1011 { .name = "brk", .hexret = true,
1012 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 1012 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1013 { .name = "chdir", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1015 { .name = "chmod", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1017 { .name = "chroot", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1013 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 1019 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1014 { .name = "close", .errmsg = true, 1020 { .name = "close", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 1021 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1016 { .name = "connect", .errmsg = true, }, 1022 { .name = "connect", .errmsg = true, },
1023 { .name = "creat", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1017 { .name = "dup", .errmsg = true, 1025 { .name = "dup", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1019 { .name = "dup2", .errmsg = true, 1027 { .name = "dup2", .errmsg = true,
@@ -1024,7 +1032,8 @@ static struct syscall_fmt {
1024 { .name = "eventfd2", .errmsg = true, 1032 { .name = "eventfd2", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 1033 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1026 { .name = "faccessat", .errmsg = true, 1034 { .name = "faccessat", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1035 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1036 [1] = SCA_FILENAME, /* filename */ }, },
1028 { .name = "fadvise64", .errmsg = true, 1037 { .name = "fadvise64", .errmsg = true,
1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 { .name = "fallocate", .errmsg = true, 1039 { .name = "fallocate", .errmsg = true,
@@ -1034,11 +1043,13 @@ static struct syscall_fmt {
1034 { .name = "fchmod", .errmsg = true, 1043 { .name = "fchmod", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1044 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "fchmodat", .errmsg = true, 1045 { .name = "fchmodat", .errmsg = true,
1037 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1046 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1047 [1] = SCA_FILENAME, /* filename */ }, },
1038 { .name = "fchown", .errmsg = true, 1048 { .name = "fchown", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1049 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040 { .name = "fchownat", .errmsg = true, 1050 { .name = "fchownat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1051 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1052 [1] = SCA_FILENAME, /* filename */ }, },
1042 { .name = "fcntl", .errmsg = true, 1053 { .name = "fcntl", .errmsg = true,
1043 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1044 [1] = SCA_STRARRAY, /* cmd */ }, 1055 [1] = SCA_STRARRAY, /* cmd */ },
@@ -1053,7 +1064,8 @@ static struct syscall_fmt {
1053 { .name = "fstat", .errmsg = true, .alias = "newfstat", 1064 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1065 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 1066 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1067 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1068 [1] = SCA_FILENAME, /* filename */ }, },
1057 { .name = "fstatfs", .errmsg = true, 1069 { .name = "fstatfs", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 { .name = "fsync", .errmsg = true, 1071 { .name = "fsync", .errmsg = true,
@@ -1063,13 +1075,18 @@ static struct syscall_fmt {
1063 { .name = "futex", .errmsg = true, 1075 { .name = "futex", .errmsg = true,
1064 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 1076 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1065 { .name = "futimesat", .errmsg = true, 1077 { .name = "futimesat", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1078 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1079 [1] = SCA_FILENAME, /* filename */ }, },
1067 { .name = "getdents", .errmsg = true, 1080 { .name = "getdents", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1081 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "getdents64", .errmsg = true, 1082 { .name = "getdents64", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1083 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1071 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1084 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1072 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1085 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1086 { .name = "getxattr", .errmsg = true,
1087 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088 { .name = "inotify_add_watch", .errmsg = true,
1089 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1073 { .name = "ioctl", .errmsg = true, 1090 { .name = "ioctl", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1091 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1075#if defined(__i386__) || defined(__x86_64__) 1092#if defined(__i386__) || defined(__x86_64__)
@@ -1082,22 +1099,44 @@ static struct syscall_fmt {
1082#else 1099#else
1083 [2] = SCA_HEX, /* arg */ }, }, 1100 [2] = SCA_HEX, /* arg */ }, },
1084#endif 1101#endif
1102 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1085 { .name = "kill", .errmsg = true, 1103 { .name = "kill", .errmsg = true,
1086 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1104 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1105 { .name = "lchown", .errmsg = true,
1106 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1107 { .name = "lgetxattr", .errmsg = true,
1108 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1087 { .name = "linkat", .errmsg = true, 1109 { .name = "linkat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1110 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1111 { .name = "listxattr", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1113 { .name = "llistxattr", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1115 { .name = "lremovexattr", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1089 { .name = "lseek", .errmsg = true, 1117 { .name = "lseek", .errmsg = true,
1090 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1118 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1091 [2] = SCA_STRARRAY, /* whence */ }, 1119 [2] = SCA_STRARRAY, /* whence */ },
1092 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 1120 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1093 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 1121 { .name = "lsetxattr", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1125 { .name = "lsxattr", .errmsg = true,
1126 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094 { .name = "madvise", .errmsg = true, 1127 { .name = "madvise", .errmsg = true,
1095 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1128 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1096 [2] = SCA_MADV_BHV, /* behavior */ }, }, 1129 [2] = SCA_MADV_BHV, /* behavior */ }, },
1130 { .name = "mkdir", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1097 { .name = "mkdirat", .errmsg = true, 1132 { .name = "mkdirat", .errmsg = true,
1098 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1133 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1134 [1] = SCA_FILENAME, /* pathname */ }, },
1135 { .name = "mknod", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1099 { .name = "mknodat", .errmsg = true, 1137 { .name = "mknodat", .errmsg = true,
1100 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1138 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1139 [1] = SCA_FILENAME, /* filename */ }, },
1101 { .name = "mlock", .errmsg = true, 1140 { .name = "mlock", .errmsg = true,
1102 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1141 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1103 { .name = "mlockall", .errmsg = true, 1142 { .name = "mlockall", .errmsg = true,
@@ -1110,6 +1149,8 @@ static struct syscall_fmt {
1110 { .name = "mprotect", .errmsg = true, 1149 { .name = "mprotect", .errmsg = true,
1111 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1150 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1112 [2] = SCA_MMAP_PROT, /* prot */ }, }, 1151 [2] = SCA_MMAP_PROT, /* prot */ }, },
1152 { .name = "mq_unlink", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1113 { .name = "mremap", .hexret = true, 1154 { .name = "mremap", .hexret = true,
1114 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1155 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1115 [3] = SCA_MREMAP_FLAGS, /* flags */ 1156 [3] = SCA_MREMAP_FLAGS, /* flags */
@@ -1121,14 +1162,17 @@ static struct syscall_fmt {
1121 { .name = "name_to_handle_at", .errmsg = true, 1162 { .name = "name_to_handle_at", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1163 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1123 { .name = "newfstatat", .errmsg = true, 1164 { .name = "newfstatat", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1165 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1166 [1] = SCA_FILENAME, /* filename */ }, },
1125 { .name = "open", .errmsg = true, 1167 { .name = "open", .errmsg = true,
1126 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 1168 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1169 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1127 { .name = "open_by_handle_at", .errmsg = true, 1170 { .name = "open_by_handle_at", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1171 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1129 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1172 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1130 { .name = "openat", .errmsg = true, 1173 { .name = "openat", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1174 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175 [1] = SCA_FILENAME, /* filename */
1132 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1176 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1133 { .name = "perf_event_open", .errmsg = true, 1177 { .name = "perf_event_open", .errmsg = true,
1134 .arg_scnprintf = { [1] = SCA_INT, /* pid */ 1178 .arg_scnprintf = { [1] = SCA_INT, /* pid */
@@ -1150,18 +1194,28 @@ static struct syscall_fmt {
1150 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1194 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1151 { .name = "read", .errmsg = true, 1195 { .name = "read", .errmsg = true,
1152 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1196 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1197 { .name = "readlink", .errmsg = true,
1198 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1153 { .name = "readlinkat", .errmsg = true, 1199 { .name = "readlinkat", .errmsg = true,
1154 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1200 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1201 [1] = SCA_FILENAME, /* pathname */ }, },
1155 { .name = "readv", .errmsg = true, 1202 { .name = "readv", .errmsg = true,
1156 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1203 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1157 { .name = "recvfrom", .errmsg = true, 1204 { .name = "recvfrom", .errmsg = true,
1158 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1205 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1206 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1159 { .name = "recvmmsg", .errmsg = true, 1207 { .name = "recvmmsg", .errmsg = true,
1160 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1208 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1209 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1161 { .name = "recvmsg", .errmsg = true, 1210 { .name = "recvmsg", .errmsg = true,
1162 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1211 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1213 { .name = "removexattr", .errmsg = true,
1214 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1163 { .name = "renameat", .errmsg = true, 1215 { .name = "renameat", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1216 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1217 { .name = "rmdir", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1165 { .name = "rt_sigaction", .errmsg = true, 1219 { .name = "rt_sigaction", .errmsg = true,
1166 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1220 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1167 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1221 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
@@ -1171,13 +1225,18 @@ static struct syscall_fmt {
1171 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1225 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1172 { .name = "select", .errmsg = true, .timeout = true, }, 1226 { .name = "select", .errmsg = true, .timeout = true, },
1173 { .name = "sendmmsg", .errmsg = true, 1227 { .name = "sendmmsg", .errmsg = true,
1174 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1228 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1229 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1175 { .name = "sendmsg", .errmsg = true, 1230 { .name = "sendmsg", .errmsg = true,
1176 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1231 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1232 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1177 { .name = "sendto", .errmsg = true, 1233 { .name = "sendto", .errmsg = true,
1178 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1234 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1179 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1236 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1180 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1237 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1238 { .name = "setxattr", .errmsg = true,
1239 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1181 { .name = "shutdown", .errmsg = true, 1240 { .name = "shutdown", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1241 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1183 { .name = "socket", .errmsg = true, 1242 { .name = "socket", .errmsg = true,
@@ -1188,18 +1247,35 @@ static struct syscall_fmt {
1188 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1247 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1189 [1] = SCA_SK_TYPE, /* type */ }, 1248 [1] = SCA_SK_TYPE, /* type */ },
1190 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1249 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1191 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1250 { .name = "stat", .errmsg = true, .alias = "newstat",
1251 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1252 { .name = "statfs", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254 { .name = "swapoff", .errmsg = true,
1255 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1256 { .name = "swapon", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1192 { .name = "symlinkat", .errmsg = true, 1258 { .name = "symlinkat", .errmsg = true,
1193 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1259 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1194 { .name = "tgkill", .errmsg = true, 1260 { .name = "tgkill", .errmsg = true,
1195 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1261 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1196 { .name = "tkill", .errmsg = true, 1262 { .name = "tkill", .errmsg = true,
1197 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1263 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1264 { .name = "truncate", .errmsg = true,
1265 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1198 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1266 { .name = "uname", .errmsg = true, .alias = "newuname", },
1199 { .name = "unlinkat", .errmsg = true, 1267 { .name = "unlinkat", .errmsg = true,
1200 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1268 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1269 [1] = SCA_FILENAME, /* pathname */ }, },
1270 { .name = "utime", .errmsg = true,
1271 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1201 { .name = "utimensat", .errmsg = true, 1272 { .name = "utimensat", .errmsg = true,
1202 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1273 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1274 [1] = SCA_FILENAME, /* filename */ }, },
1275 { .name = "utimes", .errmsg = true,
1276 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1277 { .name = "vmsplice", .errmsg = true,
1278 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203 { .name = "write", .errmsg = true, 1279 { .name = "write", .errmsg = true,
1204 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1280 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205 { .name = "writev", .errmsg = true, 1281 { .name = "writev", .errmsg = true,
@@ -1223,7 +1299,6 @@ struct syscall {
1223 int nr_args; 1299 int nr_args;
1224 struct format_field *args; 1300 struct format_field *args;
1225 const char *name; 1301 const char *name;
1226 bool filtered;
1227 bool is_exit; 1302 bool is_exit;
1228 struct syscall_fmt *fmt; 1303 struct syscall_fmt *fmt;
1229 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1304 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
@@ -1244,6 +1319,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp)
1244 return printed + fprintf(fp, "): "); 1319 return printed + fprintf(fp, "): ");
1245} 1320}
1246 1321
1322/**
1323 * filename.ptr: The filename char pointer that will be vfs_getname'd
1324 * filename.entry_str_pos: Where to insert the string translated from
1325 * filename.ptr by the vfs_getname tracepoint/kprobe.
1326 */
1247struct thread_trace { 1327struct thread_trace {
1248 u64 entry_time; 1328 u64 entry_time;
1249 u64 exit_time; 1329 u64 exit_time;
@@ -1252,6 +1332,13 @@ struct thread_trace {
1252 unsigned long pfmaj, pfmin; 1332 unsigned long pfmaj, pfmin;
1253 char *entry_str; 1333 char *entry_str;
1254 double runtime_ms; 1334 double runtime_ms;
1335 struct {
1336 unsigned long ptr;
1337 short int entry_str_pos;
1338 bool pending_open;
1339 unsigned int namelen;
1340 char *name;
1341 } filename;
1255 struct { 1342 struct {
1256 int max; 1343 int max;
1257 char **table; 1344 char **table;
@@ -1298,6 +1385,8 @@ fail:
1298#define TRACE_PFMAJ (1 << 0) 1385#define TRACE_PFMAJ (1 << 0)
1299#define TRACE_PFMIN (1 << 1) 1386#define TRACE_PFMIN (1 << 1)
1300 1387
1388static const size_t trace__entry_str_size = 2048;
1389
1301struct trace { 1390struct trace {
1302 struct perf_tool tool; 1391 struct perf_tool tool;
1303 struct { 1392 struct {
@@ -1307,6 +1396,10 @@ struct trace {
1307 struct { 1396 struct {
1308 int max; 1397 int max;
1309 struct syscall *table; 1398 struct syscall *table;
1399 struct {
1400 struct perf_evsel *sys_enter,
1401 *sys_exit;
1402 } events;
1310 } syscalls; 1403 } syscalls;
1311 struct record_opts opts; 1404 struct record_opts opts;
1312 struct perf_evlist *evlist; 1405 struct perf_evlist *evlist;
@@ -1316,7 +1409,10 @@ struct trace {
1316 FILE *output; 1409 FILE *output;
1317 unsigned long nr_events; 1410 unsigned long nr_events;
1318 struct strlist *ev_qualifier; 1411 struct strlist *ev_qualifier;
1319 const char *last_vfs_getname; 1412 struct {
1413 size_t nr;
1414 int *entries;
1415 } ev_qualifier_ids;
1320 struct intlist *tid_list; 1416 struct intlist *tid_list;
1321 struct intlist *pid_list; 1417 struct intlist *pid_list;
1322 struct { 1418 struct {
@@ -1340,6 +1436,7 @@ struct trace {
1340 bool show_tool_stats; 1436 bool show_tool_stats;
1341 bool trace_syscalls; 1437 bool trace_syscalls;
1342 bool force; 1438 bool force;
1439 bool vfs_getname;
1343 int trace_pgfaults; 1440 int trace_pgfaults;
1344}; 1441};
1345 1442
@@ -1443,6 +1540,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1443 return printed; 1540 return printed;
1444} 1541}
1445 1542
1543static void thread__set_filename_pos(struct thread *thread, const char *bf,
1544 unsigned long ptr)
1545{
1546 struct thread_trace *ttrace = thread__priv(thread);
1547
1548 ttrace->filename.ptr = ptr;
1549 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1550}
1551
1552static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1553 struct syscall_arg *arg)
1554{
1555 unsigned long ptr = arg->val;
1556
1557 if (!arg->trace->vfs_getname)
1558 return scnprintf(bf, size, "%#x", ptr);
1559
1560 thread__set_filename_pos(arg->thread, bf, ptr);
1561 return 0;
1562}
1563
1446static bool trace__filter_duration(struct trace *trace, double t) 1564static bool trace__filter_duration(struct trace *trace, double t)
1447{ 1565{
1448 return t < (trace->duration_filter * NSEC_PER_MSEC); 1566 return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -1517,6 +1635,9 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1517 if (trace->host == NULL) 1635 if (trace->host == NULL)
1518 return -ENOMEM; 1636 return -ENOMEM;
1519 1637
1638 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1639 return -errno;
1640
1520 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1641 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1521 evlist->threads, trace__tool_process, false, 1642 evlist->threads, trace__tool_process, false,
1522 trace->opts.proc_map_timeout); 1643 trace->opts.proc_map_timeout);
@@ -1578,19 +1699,6 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1578 sc = trace->syscalls.table + id; 1699 sc = trace->syscalls.table + id;
1579 sc->name = name; 1700 sc->name = name;
1580 1701
1581 if (trace->ev_qualifier) {
1582 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1583
1584 if (!(in ^ trace->not_ev_qualifier)) {
1585 sc->filtered = true;
1586 /*
1587 * No need to do read tracepoint information since this will be
1588 * filtered out.
1589 */
1590 return 0;
1591 }
1592 }
1593
1594 sc->fmt = syscall_fmt__find(sc->name); 1702 sc->fmt = syscall_fmt__find(sc->name);
1595 1703
1596 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1704 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
@@ -1619,13 +1727,27 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1619 1727
1620static int trace__validate_ev_qualifier(struct trace *trace) 1728static int trace__validate_ev_qualifier(struct trace *trace)
1621{ 1729{
1622 int err = 0; 1730 int err = 0, i;
1623 struct str_node *pos; 1731 struct str_node *pos;
1624 1732
1733 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1734 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1735 sizeof(trace->ev_qualifier_ids.entries[0]));
1736
1737 if (trace->ev_qualifier_ids.entries == NULL) {
1738 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1739 trace->output);
1740 err = -EINVAL;
1741 goto out;
1742 }
1743
1744 i = 0;
1745
1625 strlist__for_each(pos, trace->ev_qualifier) { 1746 strlist__for_each(pos, trace->ev_qualifier) {
1626 const char *sc = pos->s; 1747 const char *sc = pos->s;
1748 int id = audit_name_to_syscall(sc, trace->audit.machine);
1627 1749
1628 if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { 1750 if (id < 0) {
1629 if (err == 0) { 1751 if (err == 0) {
1630 fputs("Error:\tInvalid syscall ", trace->output); 1752 fputs("Error:\tInvalid syscall ", trace->output);
1631 err = -EINVAL; 1753 err = -EINVAL;
@@ -1635,13 +1757,17 @@ static int trace__validate_ev_qualifier(struct trace *trace)
1635 1757
1636 fputs(sc, trace->output); 1758 fputs(sc, trace->output);
1637 } 1759 }
1760
1761 trace->ev_qualifier_ids.entries[i++] = id;
1638 } 1762 }
1639 1763
1640 if (err < 0) { 1764 if (err < 0) {
1641 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1765 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1642 "\nHint:\tand: 'man syscalls'\n", trace->output); 1766 "\nHint:\tand: 'man syscalls'\n", trace->output);
1767 zfree(&trace->ev_qualifier_ids.entries);
1768 trace->ev_qualifier_ids.nr = 0;
1643 } 1769 }
1644 1770out:
1645 return err; 1771 return err;
1646} 1772}
1647 1773
@@ -1833,9 +1959,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1833 if (sc == NULL) 1959 if (sc == NULL)
1834 return -1; 1960 return -1;
1835 1961
1836 if (sc->filtered)
1837 return 0;
1838
1839 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1962 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1840 ttrace = thread__trace(thread, trace->output); 1963 ttrace = thread__trace(thread, trace->output);
1841 if (ttrace == NULL) 1964 if (ttrace == NULL)
@@ -1844,7 +1967,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1844 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1967 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1845 1968
1846 if (ttrace->entry_str == NULL) { 1969 if (ttrace->entry_str == NULL) {
1847 ttrace->entry_str = malloc(1024); 1970 ttrace->entry_str = malloc(trace__entry_str_size);
1848 if (!ttrace->entry_str) 1971 if (!ttrace->entry_str)
1849 goto out_put; 1972 goto out_put;
1850 } 1973 }
@@ -1854,9 +1977,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1854 1977
1855 ttrace->entry_time = sample->time; 1978 ttrace->entry_time = sample->time;
1856 msg = ttrace->entry_str; 1979 msg = ttrace->entry_str;
1857 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1980 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1858 1981
1859 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1982 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1860 args, trace, thread); 1983 args, trace, thread);
1861 1984
1862 if (sc->is_exit) { 1985 if (sc->is_exit) {
@@ -1864,8 +1987,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1864 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1987 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1865 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1988 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1866 } 1989 }
1867 } else 1990 } else {
1868 ttrace->entry_pending = true; 1991 ttrace->entry_pending = true;
1992 /* See trace__vfs_getname & trace__sys_exit */
1993 ttrace->filename.pending_open = false;
1994 }
1869 1995
1870 if (trace->current != thread) { 1996 if (trace->current != thread) {
1871 thread__put(trace->current); 1997 thread__put(trace->current);
@@ -1891,9 +2017,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1891 if (sc == NULL) 2017 if (sc == NULL)
1892 return -1; 2018 return -1;
1893 2019
1894 if (sc->filtered)
1895 return 0;
1896
1897 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2020 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1898 ttrace = thread__trace(thread, trace->output); 2021 ttrace = thread__trace(thread, trace->output);
1899 if (ttrace == NULL) 2022 if (ttrace == NULL)
@@ -1904,9 +2027,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1904 2027
1905 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 2028 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1906 2029
1907 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 2030 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
1908 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 2031 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1909 trace->last_vfs_getname = NULL; 2032 ttrace->filename.pending_open = false;
1910 ++trace->stats.vfs_getname; 2033 ++trace->stats.vfs_getname;
1911 } 2034 }
1912 2035
@@ -1961,7 +2084,56 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1961 union perf_event *event __maybe_unused, 2084 union perf_event *event __maybe_unused,
1962 struct perf_sample *sample) 2085 struct perf_sample *sample)
1963{ 2086{
1964 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 2087 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2088 struct thread_trace *ttrace;
2089 size_t filename_len, entry_str_len, to_move;
2090 ssize_t remaining_space;
2091 char *pos;
2092 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2093
2094 if (!thread)
2095 goto out;
2096
2097 ttrace = thread__priv(thread);
2098 if (!ttrace)
2099 goto out;
2100
2101 filename_len = strlen(filename);
2102
2103 if (ttrace->filename.namelen < filename_len) {
2104 char *f = realloc(ttrace->filename.name, filename_len + 1);
2105
2106 if (f == NULL)
2107 goto out;
2108
2109 ttrace->filename.namelen = filename_len;
2110 ttrace->filename.name = f;
2111 }
2112
2113 strcpy(ttrace->filename.name, filename);
2114 ttrace->filename.pending_open = true;
2115
2116 if (!ttrace->filename.ptr)
2117 goto out;
2118
2119 entry_str_len = strlen(ttrace->entry_str);
2120 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2121 if (remaining_space <= 0)
2122 goto out;
2123
2124 if (filename_len > (size_t)remaining_space) {
2125 filename += filename_len - remaining_space;
2126 filename_len = remaining_space;
2127 }
2128
2129 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2130 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2131 memmove(pos + filename_len, pos, to_move);
2132 memcpy(pos, filename, filename_len);
2133
2134 ttrace->filename.ptr = 0;
2135 ttrace->filename.entry_str_pos = 0;
2136out:
1965 return 0; 2137 return 0;
1966} 2138}
1967 2139
@@ -2214,19 +2386,20 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
2214 2386
2215static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2387static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2216 2388
2217static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2389static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2218{ 2390{
2219 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2391 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2220 if (evsel == NULL) 2392 if (evsel == NULL)
2221 return; 2393 return false;
2222 2394
2223 if (perf_evsel__field(evsel, "pathname") == NULL) { 2395 if (perf_evsel__field(evsel, "pathname") == NULL) {
2224 perf_evsel__delete(evsel); 2396 perf_evsel__delete(evsel);
2225 return; 2397 return false;
2226 } 2398 }
2227 2399
2228 evsel->handler = trace__vfs_getname; 2400 evsel->handler = trace__vfs_getname;
2229 perf_evlist__add(evlist, evsel); 2401 perf_evlist__add(evlist, evsel);
2402 return true;
2230} 2403}
2231 2404
2232static int perf_evlist__add_pgfault(struct perf_evlist *evlist, 2405static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
@@ -2283,9 +2456,68 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
2283 } 2456 }
2284} 2457}
2285 2458
2459static int trace__add_syscall_newtp(struct trace *trace)
2460{
2461 int ret = -1;
2462 struct perf_evlist *evlist = trace->evlist;
2463 struct perf_evsel *sys_enter, *sys_exit;
2464
2465 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2466 if (sys_enter == NULL)
2467 goto out;
2468
2469 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2470 goto out_delete_sys_enter;
2471
2472 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2473 if (sys_exit == NULL)
2474 goto out_delete_sys_enter;
2475
2476 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2477 goto out_delete_sys_exit;
2478
2479 perf_evlist__add(evlist, sys_enter);
2480 perf_evlist__add(evlist, sys_exit);
2481
2482 trace->syscalls.events.sys_enter = sys_enter;
2483 trace->syscalls.events.sys_exit = sys_exit;
2484
2485 ret = 0;
2486out:
2487 return ret;
2488
2489out_delete_sys_exit:
2490 perf_evsel__delete_priv(sys_exit);
2491out_delete_sys_enter:
2492 perf_evsel__delete_priv(sys_enter);
2493 goto out;
2494}
2495
2496static int trace__set_ev_qualifier_filter(struct trace *trace)
2497{
2498 int err = -1;
2499 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2500 trace->ev_qualifier_ids.nr,
2501 trace->ev_qualifier_ids.entries);
2502
2503 if (filter == NULL)
2504 goto out_enomem;
2505
2506 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2507 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2508
2509 free(filter);
2510out:
2511 return err;
2512out_enomem:
2513 errno = ENOMEM;
2514 goto out;
2515}
2516
2286static int trace__run(struct trace *trace, int argc, const char **argv) 2517static int trace__run(struct trace *trace, int argc, const char **argv)
2287{ 2518{
2288 struct perf_evlist *evlist = trace->evlist; 2519 struct perf_evlist *evlist = trace->evlist;
2520 struct perf_evsel *evsel;
2289 int err = -1, i; 2521 int err = -1, i;
2290 unsigned long before; 2522 unsigned long before;
2291 const bool forks = argc > 0; 2523 const bool forks = argc > 0;
@@ -2293,13 +2525,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2293 2525
2294 trace->live = true; 2526 trace->live = true;
2295 2527
2296 if (trace->trace_syscalls && 2528 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2297 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2298 trace__sys_exit))
2299 goto out_error_raw_syscalls; 2529 goto out_error_raw_syscalls;
2300 2530
2301 if (trace->trace_syscalls) 2531 if (trace->trace_syscalls)
2302 perf_evlist__add_vfs_getname(evlist); 2532 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2303 2533
2304 if ((trace->trace_pgfaults & TRACE_PFMAJ) && 2534 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2305 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { 2535 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
@@ -2356,11 +2586,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2356 else if (thread_map__pid(evlist->threads, 0) == -1) 2586 else if (thread_map__pid(evlist->threads, 0) == -1)
2357 err = perf_evlist__set_filter_pid(evlist, getpid()); 2587 err = perf_evlist__set_filter_pid(evlist, getpid());
2358 2588
2359 if (err < 0) { 2589 if (err < 0)
2360 printf("err=%d,%s\n", -err, strerror(-err)); 2590 goto out_error_mem;
2361 exit(1); 2591
2592 if (trace->ev_qualifier_ids.nr > 0) {
2593 err = trace__set_ev_qualifier_filter(trace);
2594 if (err < 0)
2595 goto out_errno;
2596
2597 pr_debug("event qualifier tracepoint filter: %s\n",
2598 trace->syscalls.events.sys_exit->filter);
2362 } 2599 }
2363 2600
2601 err = perf_evlist__apply_filters(evlist, &evsel);
2602 if (err < 0)
2603 goto out_error_apply_filters;
2604
2364 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 2605 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2365 if (err < 0) 2606 if (err < 0)
2366 goto out_error_mmap; 2607 goto out_error_mmap;
@@ -2462,10 +2703,21 @@ out_error_open:
2462out_error: 2703out_error:
2463 fprintf(trace->output, "%s\n", errbuf); 2704 fprintf(trace->output, "%s\n", errbuf);
2464 goto out_delete_evlist; 2705 goto out_delete_evlist;
2706
2707out_error_apply_filters:
2708 fprintf(trace->output,
2709 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2710 evsel->filter, perf_evsel__name(evsel), errno,
2711 strerror_r(errno, errbuf, sizeof(errbuf)));
2712 goto out_delete_evlist;
2465} 2713}
2466out_error_mem: 2714out_error_mem:
2467 fprintf(trace->output, "Not enough memory to run!\n"); 2715 fprintf(trace->output, "Not enough memory to run!\n");
2468 goto out_delete_evlist; 2716 goto out_delete_evlist;
2717
2718out_errno:
2719 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2720 goto out_delete_evlist;
2469} 2721}
2470 2722
2471static int trace__replay(struct trace *trace) 2723static int trace__replay(struct trace *trace)
@@ -2586,9 +2838,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
2586 2838
2587 printed += fprintf(fp, "\n"); 2839 printed += fprintf(fp, "\n");
2588 2840
2589 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2841 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2590 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2842 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2591 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2843 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2592 2844
2593 /* each int_node is a syscall */ 2845 /* each int_node is a syscall */
2594 while (inode) { 2846 while (inode) {
@@ -2605,8 +2857,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
2605 2857
2606 sc = &trace->syscalls.table[inode->i]; 2858 sc = &trace->syscalls.table[inode->i];
2607 printed += fprintf(fp, " %-15s", sc->name); 2859 printed += fprintf(fp, " %-15s", sc->name);
2608 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2860 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2609 n, min, avg); 2861 n, avg * n, min, avg);
2610 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2862 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2611 } 2863 }
2612 2864
@@ -2778,7 +3030,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2778 .mmap_pages = UINT_MAX, 3030 .mmap_pages = UINT_MAX,
2779 .proc_map_timeout = 500, 3031 .proc_map_timeout = 500,
2780 }, 3032 },
2781 .output = stdout, 3033 .output = stderr,
2782 .show_comm = true, 3034 .show_comm = true,
2783 .trace_syscalls = true, 3035 .trace_syscalls = true,
2784 }; 3036 };
@@ -2879,11 +3131,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2879 3131
2880 if (ev_qualifier_str != NULL) { 3132 if (ev_qualifier_str != NULL) {
2881 const char *s = ev_qualifier_str; 3133 const char *s = ev_qualifier_str;
3134 struct strlist_config slist_config = {
3135 .dirname = system_path(STRACE_GROUPS_DIR),
3136 };
2882 3137
2883 trace.not_ev_qualifier = *s == '!'; 3138 trace.not_ev_qualifier = *s == '!';
2884 if (trace.not_ev_qualifier) 3139 if (trace.not_ev_qualifier)
2885 ++s; 3140 ++s;
2886 trace.ev_qualifier = strlist__new(true, s); 3141 trace.ev_qualifier = strlist__new(s, &slist_config);
2887 if (trace.ev_qualifier == NULL) { 3142 if (trace.ev_qualifier == NULL) {
2888 fputs("Not enough memory to parse event qualifier", 3143 fputs("Not enough memory to parse event qualifier",
2889 trace.output); 3144 trace.output);
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index d31fac19c30b..827557fc7511 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -11,7 +11,7 @@ ifneq ($(obj-perf),)
11obj-perf := $(abspath $(obj-perf))/ 11obj-perf := $(abspath $(obj-perf))/
12endif 12endif
13 13
14$(shell echo -n > $(OUTPUT).config-detected) 14$(shell printf "" > $(OUTPUT).config-detected)
15detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) 15detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
16detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) 16detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
17 17
@@ -297,7 +297,11 @@ ifndef NO_LIBELF
297 else 297 else
298 CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) 298 CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
299 LDFLAGS += $(LIBDW_LDFLAGS) 299 LDFLAGS += $(LIBDW_LDFLAGS)
300 EXTLIBS += -ldw 300 DWARFLIBS := -ldw
301 ifeq ($(findstring -static,${LDFLAGS}),-static)
302 DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
303 endif
304 EXTLIBS += ${DWARFLIBS}
301 $(call detected,CONFIG_DWARF) 305 $(call detected,CONFIG_DWARF)
302 endif # PERF_HAVE_DWARF_REGS 306 endif # PERF_HAVE_DWARF_REGS
303 endif # NO_DWARF 307 endif # NO_DWARF
@@ -644,6 +648,7 @@ infodir = share/info
644perfexecdir = libexec/perf-core 648perfexecdir = libexec/perf-core
645sharedir = $(prefix)/share 649sharedir = $(prefix)/share
646template_dir = share/perf-core/templates 650template_dir = share/perf-core/templates
651STRACE_GROUPS_DIR = share/perf-core/strace/groups
647htmldir = share/doc/perf-doc 652htmldir = share/doc/perf-doc
648ifeq ($(prefix),/usr) 653ifeq ($(prefix),/usr)
649sysconfdir = /etc 654sysconfdir = /etc
@@ -663,6 +668,7 @@ libdir = $(prefix)/$(lib)
663 668
664# Shell quote (do not use $(call) to accommodate ancient setups); 669# Shell quote (do not use $(call) to accommodate ancient setups);
665ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) 670ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
671STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR))
666DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) 672DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
667bindir_SQ = $(subst ','\'',$(bindir)) 673bindir_SQ = $(subst ','\'',$(bindir))
668mandir_SQ = $(subst ','\'',$(mandir)) 674mandir_SQ = $(subst ','\'',$(mandir))
@@ -676,10 +682,13 @@ libdir_SQ = $(subst ','\'',$(libdir))
676 682
677ifneq ($(filter /%,$(firstword $(perfexecdir))),) 683ifneq ($(filter /%,$(firstword $(perfexecdir))),)
678perfexec_instdir = $(perfexecdir) 684perfexec_instdir = $(perfexecdir)
685STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
679else 686else
680perfexec_instdir = $(prefix)/$(perfexecdir) 687perfexec_instdir = $(prefix)/$(perfexecdir)
688STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
681endif 689endif
682perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) 690perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
691STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
683 692
684# If we install to $(HOME) we keep the traceevent default: 693# If we install to $(HOME) we keep the traceevent default:
685# $(HOME)/.traceevent/plugins 694# $(HOME)/.traceevent/plugins
@@ -713,6 +722,7 @@ $(call detected_var,htmldir_SQ)
713$(call detected_var,infodir_SQ) 722$(call detected_var,infodir_SQ)
714$(call detected_var,mandir_SQ) 723$(call detected_var,mandir_SQ)
715$(call detected_var,ETC_PERFCONFIG_SQ) 724$(call detected_var,ETC_PERFCONFIG_SQ)
725$(call detected_var,STRACE_GROUPS_DIR_SQ)
716$(call detected_var,prefix_SQ) 726$(call detected_var,prefix_SQ)
717$(call detected_var,perfexecdir_SQ) 727$(call detected_var,perfexecdir_SQ)
718$(call detected_var,LIBDIR) 728$(call detected_var,LIBDIR)
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
index c7ff90a90e4e..7e47a7cbc195 100644
--- a/tools/perf/perf-with-kcore.sh
+++ b/tools/perf/perf-with-kcore.sh
@@ -50,7 +50,7 @@ copy_kcore()
50 fi 50 fi
51 51
52 rm -f perf.data.junk 52 rm -f perf.data.junk
53 ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & 53 ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
54 PERF_PID=$! 54 PERF_PID=$!
55 55
56 # Need to make sure that perf has started 56 # Need to make sure that perf has started
@@ -160,18 +160,18 @@ record()
160 echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 160 echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
161 fi 161 fi
162 162
163 if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then 163 if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
164 echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 164 echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
165 fi 165 fi
166 166
167 if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then 167 if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
168 if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then 168 if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
169 echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 169 echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
170 fi 170 fi
171 171
172 if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then 172 if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
173 true 173 true
174 elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then 174 elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
175 true 175 true
176 elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then 176 elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
177 echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 177 echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
@@ -193,8 +193,8 @@ record()
193 193
194 mkdir "$PERF_DATA_DIR" 194 mkdir "$PERF_DATA_DIR"
195 195
196 echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" 196 echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
197 "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true 197 "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
198 198
199 if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then 199 if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
200 exit 1 200 exit 1
@@ -209,8 +209,8 @@ subcommand()
209{ 209{
210 find_perf 210 find_perf
211 check_buildid_cache_permissions 211 check_buildid_cache_permissions
212 echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" 212 echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
213 "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* 213 "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
214} 214}
215 215
216if [ "$1" = "fix_buildid_cache_permissions" ] ; then 216if [ "$1" = "fix_buildid_cache_permissions" ] ; then
@@ -234,7 +234,7 @@ fi
234case "$PERF_SUB_COMMAND" in 234case "$PERF_SUB_COMMAND" in
235"record") 235"record")
236 while [ "$1" != "--" ] ; do 236 while [ "$1" != "--" ] ; do
237 PERF_OPTIONS+="$1 " 237 PERF_OPTIONS+=("$1")
238 shift || break 238 shift || break
239 done 239 done
240 if [ "$1" != "--" ] ; then 240 if [ "$1" != "--" ] ; then
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
242 usage 242 usage
243 fi 243 fi
244 shift 244 shift
245 record $* 245 record "$@"
246;; 246;;
247"script") 247"script")
248 subcommand $* 248 subcommand "$@"
249;; 249;;
250"report") 250"report")
251 subcommand $* 251 subcommand "$@"
252;; 252;;
253"inject") 253"inject")
254 subcommand $* 254 subcommand "$@"
255;; 255;;
256*) 256*)
257 usage 257 usage
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index b857fcbd00cf..07dbff5c0e60 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -231,7 +231,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
231 (*argc)--; 231 (*argc)--;
232 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { 232 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
233 perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); 233 perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
234 fprintf(stderr, "dir: %s\n", debugfs_mountpoint); 234 fprintf(stderr, "dir: %s\n", tracing_path);
235 if (envchanged) 235 if (envchanged)
236 *envchanged = 1; 236 *envchanged = 1;
237 } else if (!strcmp(cmd, "--list-cmds")) { 237 } else if (!strcmp(cmd, "--list-cmds")) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 4a5827fff799..cccb4cf575d3 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -51,11 +51,14 @@ struct record_opts {
51 bool sample_address; 51 bool sample_address;
52 bool sample_weight; 52 bool sample_weight;
53 bool sample_time; 53 bool sample_time;
54 bool sample_time_set;
55 bool callgraph_set;
54 bool period; 56 bool period;
55 bool sample_intr_regs; 57 bool sample_intr_regs;
56 bool running_time; 58 bool running_time;
57 bool full_auxtrace; 59 bool full_auxtrace;
58 bool auxtrace_snapshot_mode; 60 bool auxtrace_snapshot_mode;
61 bool record_switch_events;
59 unsigned int freq; 62 unsigned int freq;
60 unsigned int mmap_pages; 63 unsigned int mmap_pages;
61 unsigned int auxtrace_mmap_pages; 64 unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index 2225162ee1fc..b9d508336ae6 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -18,10 +18,20 @@ import perf
18def main(): 18def main():
19 cpus = perf.cpu_map() 19 cpus = perf.cpu_map()
20 threads = perf.thread_map() 20 threads = perf.thread_map()
21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0, 21 evsel = perf.evsel(type = perf.TYPE_SOFTWARE,
22 config = perf.COUNT_SW_DUMMY,
23 task = 1, comm = 1, mmap = 0, freq = 0,
22 wakeup_events = 1, watermark = 1, 24 wakeup_events = 1, watermark = 1,
23 sample_id_all = 1, 25 sample_id_all = 1,
24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU) 26 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
27
28 """What we want are just the PERF_RECORD_ lifetime events for threads,
29 using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1
30 (the default), makes perf reenable irq_vectors:local_timer_entry, when
31 disabling nohz, not good for some use cases where all we want is to get
32 threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY,
33 freq=0) instead."""
34
25 evsel.open(cpus = cpus, threads = threads); 35 evsel.open(cpus = cpus, threads = threads);
26 evlist = perf.evlist(cpus, threads) 36 evlist = perf.evlist(cpus, threads)
27 evlist.add(evsel) 37 evlist.add(evsel)
diff --git a/tools/perf/scripts/python/bin/compaction-times-record b/tools/perf/scripts/python/bin/compaction-times-record
new file mode 100644
index 000000000000..6edcd40e14e8
--- /dev/null
+++ b/tools/perf/scripts/python/bin/compaction-times-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@
diff --git a/tools/perf/scripts/python/bin/compaction-times-report b/tools/perf/scripts/python/bin/compaction-times-report
new file mode 100644
index 000000000000..3dc13897cfde
--- /dev/null
+++ b/tools/perf/scripts/python/bin/compaction-times-report
@@ -0,0 +1,4 @@
1#!/bin/bash
2#description: display time taken by mm compaction
3#args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]
4perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@
diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py
new file mode 100644
index 000000000000..e78fdc2a5a9d
--- /dev/null
+++ b/tools/perf/scripts/python/call-graph-from-postgresql.py
@@ -0,0 +1,327 @@
1#!/usr/bin/python2
2# call-graph-from-postgresql.py: create call-graph from postgresql database
3# Copyright (c) 2014, Intel Corporation.
4#
5# This program is free software; you can redistribute it and/or modify it
6# under the terms and conditions of the GNU General Public License,
7# version 2, as published by the Free Software Foundation.
8#
9# This program is distributed in the hope it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12# more details.
13
14# To use this script you will need to have exported data using the
15# export-to-postgresql.py script. Refer to that script for details.
16#
17# Following on from the example in the export-to-postgresql.py script, a
18# call-graph can be displayed for the pt_example database like this:
19#
20# python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example
21#
22# Note this script supports connecting to remote databases by setting hostname,
23# port, username, password, and dbname e.g.
24#
25# python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
26#
27# The result is a GUI window with a tree representing a context-sensitive
28# call-graph. Expanding a couple of levels of the tree and adjusting column
29# widths to suit will display something like:
30#
31# Call Graph: pt_example
32# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
33# v- ls
34# v- 2638:2638
35# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
36# |- unknown unknown 1 13198 0.1 1 0.0
37# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
38# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
39# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
40# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
41# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
42# >- __libc_csu_init ls 1 10354 0.1 10 0.0
43# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
44# v- main ls 1 8182043 99.6 180254 99.9
45#
46# Points to note:
47# The top level is a command name (comm)
48# The next level is a thread (pid:tid)
49# Subsequent levels are functions
50# 'Count' is the number of calls
51# 'Time' is the elapsed time until the function returns
52# Percentages are relative to the level above
53# 'Branch Count' is the total number of branches for that function and all
54# functions that it calls
55
56import sys
57from PySide.QtCore import *
58from PySide.QtGui import *
59from PySide.QtSql import *
60from decimal import *
61
62class TreeItem():
63
64 def __init__(self, db, row, parent_item):
65 self.db = db
66 self.row = row
67 self.parent_item = parent_item
68 self.query_done = False;
69 self.child_count = 0
70 self.child_items = []
71 self.data = ["", "", "", "", "", "", ""]
72 self.comm_id = 0
73 self.thread_id = 0
74 self.call_path_id = 1
75 self.branch_count = 0
76 self.time = 0
77 if not parent_item:
78 self.setUpRoot()
79
80 def setUpRoot(self):
81 self.query_done = True
82 query = QSqlQuery(self.db)
83 ret = query.exec_('SELECT id, comm FROM comms')
84 if not ret:
85 raise Exception("Query failed: " + query.lastError().text())
86 while query.next():
87 if not query.value(0):
88 continue
89 child_item = TreeItem(self.db, self.child_count, self)
90 self.child_items.append(child_item)
91 self.child_count += 1
92 child_item.setUpLevel1(query.value(0), query.value(1))
93
94 def setUpLevel1(self, comm_id, comm):
95 self.query_done = True;
96 self.comm_id = comm_id
97 self.data[0] = comm
98 self.child_items = []
99 self.child_count = 0
100 query = QSqlQuery(self.db)
101 ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
102 if not ret:
103 raise Exception("Query failed: " + query.lastError().text())
104 while query.next():
105 child_item = TreeItem(self.db, self.child_count, self)
106 self.child_items.append(child_item)
107 self.child_count += 1
108 child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
109
110 def setUpLevel2(self, comm_id, thread_id, pid, tid):
111 self.comm_id = comm_id
112 self.thread_id = thread_id
113 self.data[0] = str(pid) + ":" + str(tid)
114
115 def getChildItem(self, row):
116 return self.child_items[row]
117
118 def getParentItem(self):
119 return self.parent_item
120
121 def getRow(self):
122 return self.row
123
124 def timePercent(self, b):
125 if not self.time:
126 return "0.0"
127 x = (b * Decimal(100)) / self.time
128 return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
129
130 def branchPercent(self, b):
131 if not self.branch_count:
132 return "0.0"
133 x = (b * Decimal(100)) / self.branch_count
134 return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
135
136 def addChild(self, call_path_id, name, dso, count, time, branch_count):
137 child_item = TreeItem(self.db, self.child_count, self)
138 child_item.comm_id = self.comm_id
139 child_item.thread_id = self.thread_id
140 child_item.call_path_id = call_path_id
141 child_item.branch_count = branch_count
142 child_item.time = time
143 child_item.data[0] = name
144 if dso == "[kernel.kallsyms]":
145 dso = "[kernel]"
146 child_item.data[1] = dso
147 child_item.data[2] = str(count)
148 child_item.data[3] = str(time)
149 child_item.data[4] = self.timePercent(time)
150 child_item.data[5] = str(branch_count)
151 child_item.data[6] = self.branchPercent(branch_count)
152 self.child_items.append(child_item)
153 self.child_count += 1
154
155 def selectCalls(self):
156 self.query_done = True;
157 query = QSqlQuery(self.db)
158 ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
159 '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
160 '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
161 '( SELECT ip FROM call_paths where id = call_path_id ) '
162 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
163 'ORDER BY call_path_id')
164 if not ret:
165 raise Exception("Query failed: " + query.lastError().text())
166 last_call_path_id = 0
167 name = ""
168 dso = ""
169 count = 0
170 branch_count = 0
171 total_branch_count = 0
172 time = 0
173 total_time = 0
174 while query.next():
175 if query.value(1) == last_call_path_id:
176 count += 1
177 branch_count += query.value(2)
178 time += query.value(4) - query.value(3)
179 else:
180 if count:
181 self.addChild(last_call_path_id, name, dso, count, time, branch_count)
182 last_call_path_id = query.value(1)
183 name = query.value(5)
184 dso = query.value(6)
185 count = 1
186 total_branch_count += branch_count
187 total_time += time
188 branch_count = query.value(2)
189 time = query.value(4) - query.value(3)
190 if count:
191 self.addChild(last_call_path_id, name, dso, count, time, branch_count)
192 total_branch_count += branch_count
193 total_time += time
194 # Top level does not have time or branch count, so fix that here
195 if total_branch_count > self.branch_count:
196 self.branch_count = total_branch_count
197 if self.branch_count:
198 for child_item in self.child_items:
199 child_item.data[6] = self.branchPercent(child_item.branch_count)
200 if total_time > self.time:
201 self.time = total_time
202 if self.time:
203 for child_item in self.child_items:
204 child_item.data[4] = self.timePercent(child_item.time)
205
206 def childCount(self):
207 if not self.query_done:
208 self.selectCalls()
209 return self.child_count
210
211 def columnCount(self):
212 return 7
213
214 def columnHeader(self, column):
215 headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
216 return headers[column]
217
218 def getData(self, column):
219 return self.data[column]
220
221class TreeModel(QAbstractItemModel):
222
223 def __init__(self, db, parent=None):
224 super(TreeModel, self).__init__(parent)
225 self.db = db
226 self.root = TreeItem(db, 0, None)
227
228 def columnCount(self, parent):
229 return self.root.columnCount()
230
231 def rowCount(self, parent):
232 if parent.isValid():
233 parent_item = parent.internalPointer()
234 else:
235 parent_item = self.root
236 return parent_item.childCount()
237
238 def headerData(self, section, orientation, role):
239 if role == Qt.TextAlignmentRole:
240 if section > 1:
241 return Qt.AlignRight
242 if role != Qt.DisplayRole:
243 return None
244 if orientation != Qt.Horizontal:
245 return None
246 return self.root.columnHeader(section)
247
248 def parent(self, child):
249 child_item = child.internalPointer()
250 if child_item is self.root:
251 return QModelIndex()
252 parent_item = child_item.getParentItem()
253 return self.createIndex(parent_item.getRow(), 0, parent_item)
254
255 def index(self, row, column, parent):
256 if parent.isValid():
257 parent_item = parent.internalPointer()
258 else:
259 parent_item = self.root
260 child_item = parent_item.getChildItem(row)
261 return self.createIndex(row, column, child_item)
262
263 def data(self, index, role):
264 if role == Qt.TextAlignmentRole:
265 if index.column() > 1:
266 return Qt.AlignRight
267 if role != Qt.DisplayRole:
268 return None
269 index_item = index.internalPointer()
270 return index_item.getData(index.column())
271
272class MainWindow(QMainWindow):
273
274 def __init__(self, db, dbname, parent=None):
275 super(MainWindow, self).__init__(parent)
276
277 self.setObjectName("MainWindow")
278 self.setWindowTitle("Call Graph: " + dbname)
279 self.move(100, 100)
280 self.resize(800, 600)
281 style = self.style()
282 icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
283 self.setWindowIcon(icon);
284
285 self.model = TreeModel(db)
286
287 self.view = QTreeView()
288 self.view.setModel(self.model)
289
290 self.setCentralWidget(self.view)
291
292if __name__ == '__main__':
293 if (len(sys.argv) < 2):
294 print >> sys.stderr, "Usage is: call-graph-from-postgresql.py <database name>"
295 raise Exception("Too few arguments")
296
297 dbname = sys.argv[1]
298
299 db = QSqlDatabase.addDatabase('QPSQL')
300
301 opts = dbname.split()
302 for opt in opts:
303 if '=' in opt:
304 opt = opt.split('=')
305 if opt[0] == 'hostname':
306 db.setHostName(opt[1])
307 elif opt[0] == 'port':
308 db.setPort(int(opt[1]))
309 elif opt[0] == 'username':
310 db.setUserName(opt[1])
311 elif opt[0] == 'password':
312 db.setPassword(opt[1])
313 elif opt[0] == 'dbname':
314 dbname = opt[1]
315 else:
316 dbname = opt
317
318 db.setDatabaseName(dbname)
319 if not db.open():
320 raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
321
322 app = QApplication(sys.argv)
323 window = MainWindow(db, dbname)
324 window.show()
325 err = app.exec_()
326 db.close()
327 sys.exit(err)
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py
new file mode 100644
index 000000000000..239cb0568ec3
--- /dev/null
+++ b/tools/perf/scripts/python/compaction-times.py
@@ -0,0 +1,311 @@
1# report time spent in compaction
2# Licensed under the terms of the GNU GPL License version 2
3
4# testing:
5# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones
6
7import os
8import sys
9import re
10
11import signal
12signal.signal(signal.SIGPIPE, signal.SIG_DFL)
13
14usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n"
15
16class popt:
17 DISP_DFL = 0
18 DISP_PROC = 1
19 DISP_PROC_VERBOSE=2
20
21class topt:
22 DISP_TIME = 0
23 DISP_MIG = 1
24 DISP_ISOLFREE = 2
25 DISP_ISOLMIG = 4
26 DISP_ALL = 7
27
28class comm_filter:
29 def __init__(self, re):
30 self.re = re
31
32 def filter(self, pid, comm):
33 m = self.re.search(comm)
34 return m == None or m.group() == ""
35
36class pid_filter:
37 def __init__(self, low, high):
38 self.low = (0 if low == "" else int(low))
39 self.high = (0 if high == "" else int(high))
40
41 def filter(self, pid, comm):
42 return not (pid >= self.low and (self.high == 0 or pid <= self.high))
43
44def set_type(t):
45 global opt_disp
46 opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t)
47
48def ns(sec, nsec):
49 return (sec * 1000000000) + nsec
50
51def time(ns):
52 return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000)
53
54class pair:
55 def __init__(self, aval, bval, alabel = None, blabel = None):
56 self.alabel = alabel
57 self.blabel = blabel
58 self.aval = aval
59 self.bval = bval
60
61 def __add__(self, rhs):
62 self.aval += rhs.aval
63 self.bval += rhs.bval
64 return self
65
66 def __str__(self):
67 return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval)
68
69class cnode:
70 def __init__(self, ns):
71 self.ns = ns
72 self.migrated = pair(0, 0, "moved", "failed")
73 self.fscan = pair(0,0, "scanned", "isolated")
74 self.mscan = pair(0,0, "scanned", "isolated")
75
76 def __add__(self, rhs):
77 self.ns += rhs.ns
78 self.migrated += rhs.migrated
79 self.fscan += rhs.fscan
80 self.mscan += rhs.mscan
81 return self
82
83 def __str__(self):
84 prev = 0
85 s = "%s " % time(self.ns)
86 if (opt_disp & topt.DISP_MIG):
87 s += "migration: %s" % self.migrated
88 prev = 1
89 if (opt_disp & topt.DISP_ISOLFREE):
90 s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan)
91 prev = 1
92 if (opt_disp & topt.DISP_ISOLMIG):
93 s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan)
94 return s
95
96 def complete(self, secs, nsecs):
97 self.ns = ns(secs, nsecs) - self.ns
98
99 def increment(self, migrated, fscan, mscan):
100 if (migrated != None):
101 self.migrated += migrated
102 if (fscan != None):
103 self.fscan += fscan
104 if (mscan != None):
105 self.mscan += mscan
106
107
108class chead:
109 heads = {}
110 val = cnode(0);
111 fobj = None
112
113 @classmethod
114 def add_filter(cls, filter):
115 cls.fobj = filter
116
117 @classmethod
118 def create_pending(cls, pid, comm, start_secs, start_nsecs):
119 filtered = 0
120 try:
121 head = cls.heads[pid]
122 filtered = head.is_filtered()
123 except KeyError:
124 if cls.fobj != None:
125 filtered = cls.fobj.filter(pid, comm)
126 head = cls.heads[pid] = chead(comm, pid, filtered)
127
128 if not filtered:
129 head.mark_pending(start_secs, start_nsecs)
130
131 @classmethod
132 def increment_pending(cls, pid, migrated, fscan, mscan):
133 head = cls.heads[pid]
134 if not head.is_filtered():
135 if head.is_pending():
136 head.do_increment(migrated, fscan, mscan)
137 else:
138 sys.stderr.write("missing start compaction event for pid %d\n" % pid)
139
140 @classmethod
141 def complete_pending(cls, pid, secs, nsecs):
142 head = cls.heads[pid]
143 if not head.is_filtered():
144 if head.is_pending():
145 head.make_complete(secs, nsecs)
146 else:
147 sys.stderr.write("missing start compaction event for pid %d\n" % pid)
148
149 @classmethod
150 def gen(cls):
151 if opt_proc != popt.DISP_DFL:
152 for i in cls.heads:
153 yield cls.heads[i]
154
155 @classmethod
156 def str(cls):
157 return cls.val
158
159 def __init__(self, comm, pid, filtered):
160 self.comm = comm
161 self.pid = pid
162 self.val = cnode(0)
163 self.pending = None
164 self.filtered = filtered
165 self.list = []
166
167 def __add__(self, rhs):
168 self.ns += rhs.ns
169 self.val += rhs.val
170 return self
171
172 def mark_pending(self, secs, nsecs):
173 self.pending = cnode(ns(secs, nsecs))
174
175 def do_increment(self, migrated, fscan, mscan):
176 self.pending.increment(migrated, fscan, mscan)
177
178 def make_complete(self, secs, nsecs):
179 self.pending.complete(secs, nsecs)
180 chead.val += self.pending
181
182 if opt_proc != popt.DISP_DFL:
183 self.val += self.pending
184
185 if opt_proc == popt.DISP_PROC_VERBOSE:
186 self.list.append(self.pending)
187 self.pending = None
188
189 def enumerate(self):
190 if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered():
191 for i, pelem in enumerate(self.list):
192 sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem))
193
194 def is_pending(self):
195 return self.pending != None
196
197 def is_filtered(self):
198 return self.filtered
199
200 def display(self):
201 if not self.is_filtered():
202 sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val))
203
204
205def trace_end():
206 sys.stdout.write("total: %s\n" % chead.str())
207 for i in chead.gen():
208 i.display(),
209 i.enumerate()
210
211def compaction__mm_compaction_migratepages(event_name, context, common_cpu,
212 common_secs, common_nsecs, common_pid, common_comm,
213 common_callchain, nr_migrated, nr_failed):
214
215 chead.increment_pending(common_pid,
216 pair(nr_migrated, nr_failed), None, None)
217
218def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu,
219 common_secs, common_nsecs, common_pid, common_comm,
220 common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
221
222 chead.increment_pending(common_pid,
223 None, pair(nr_scanned, nr_taken), None)
224
225def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu,
226 common_secs, common_nsecs, common_pid, common_comm,
227 common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
228
229 chead.increment_pending(common_pid,
230 None, None, pair(nr_scanned, nr_taken))
231
232def compaction__mm_compaction_end(event_name, context, common_cpu,
233 common_secs, common_nsecs, common_pid, common_comm,
234 common_callchain, zone_start, migrate_start, free_start, zone_end,
235 sync, status):
236
237 chead.complete_pending(common_pid, common_secs, common_nsecs)
238
239def compaction__mm_compaction_begin(event_name, context, common_cpu,
240 common_secs, common_nsecs, common_pid, common_comm,
241 common_callchain, zone_start, migrate_start, free_start, zone_end,
242 sync):
243
244 chead.create_pending(common_pid, common_comm, common_secs, common_nsecs)
245
246def pr_help():
247 global usage
248
249 sys.stdout.write(usage)
250 sys.stdout.write("\n")
251 sys.stdout.write("-h display this help\n")
252 sys.stdout.write("-p display by process\n")
253 sys.stdout.write("-pv display by process (verbose)\n")
254 sys.stdout.write("-t display stall times only\n")
255 sys.stdout.write("-m display stats for migration\n")
256 sys.stdout.write("-fs display stats for free scanner\n")
257 sys.stdout.write("-ms display stats for migration scanner\n")
258 sys.stdout.write("-u display results in microseconds (default nanoseconds)\n")
259
260
261comm_re = None
262pid_re = None
263pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
264
265opt_proc = popt.DISP_DFL
266opt_disp = topt.DISP_ALL
267
268opt_ns = True
269
270argc = len(sys.argv) - 1
271if argc >= 1:
272 pid_re = re.compile(pid_regex)
273
274 for i, opt in enumerate(sys.argv[1:]):
275 if opt[0] == "-":
276 if opt == "-h":
277 pr_help()
278 exit(0);
279 elif opt == "-p":
280 opt_proc = popt.DISP_PROC
281 elif opt == "-pv":
282 opt_proc = popt.DISP_PROC_VERBOSE
283 elif opt == '-u':
284 opt_ns = False
285 elif opt == "-t":
286 set_type(topt.DISP_TIME)
287 elif opt == "-m":
288 set_type(topt.DISP_MIG)
289 elif opt == "-fs":
290 set_type(topt.DISP_ISOLFREE)
291 elif opt == "-ms":
292 set_type(topt.DISP_ISOLMIG)
293 else:
294 sys.exit(usage)
295
296 elif i == argc - 1:
297 m = pid_re.search(opt)
298 if m != None and m.group() != "":
299 if m.group(3) != None:
300 f = pid_filter(m.group(3), m.group(3))
301 else:
302 f = pid_filter(m.group(1), m.group(2))
303 else:
304 try:
305 comm_re=re.compile(opt)
306 except:
307 sys.stderr.write("invalid regex '%s'" % opt)
308 sys.exit(usage)
309 f = comm_filter(comm_re)
310
311 chead.add_filter(f)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 4cdafd880074..84a32037a80f 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -15,6 +15,53 @@ import sys
15import struct 15import struct
16import datetime 16import datetime
17 17
18# To use this script you will need to have installed package python-pyside which
19# provides LGPL-licensed Python bindings for Qt. You will also need the package
20# libqt4-sql-psql for Qt postgresql support.
21#
22# The script assumes postgresql is running on the local machine and that the
23# user has postgresql permissions to create databases. Examples of installing
24# postgresql and adding such a user are:
25#
26# fedora:
27#
28# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
29# $ sudo su - postgres -c initdb
30# $ sudo service postgresql start
31# $ sudo su - postgres
32# $ createuser <your user id here>
33# Shall the new role be a superuser? (y/n) y
34#
35# ubuntu:
36#
37# $ sudo apt-get install postgresql
38# $ sudo su - postgres
39# $ createuser <your user id here>
40# Shall the new role be a superuser? (y/n) y
41#
42# An example of using this script with Intel PT:
43#
44# $ perf record -e intel_pt//u ls
45# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls
46# 2015-05-29 12:49:23.464364 Creating database...
47# 2015-05-29 12:49:26.281717 Writing to intermediate files...
48# 2015-05-29 12:49:27.190383 Copying to database...
49# 2015-05-29 12:49:28.140451 Removing intermediate files...
50# 2015-05-29 12:49:28.147451 Adding primary keys
51# 2015-05-29 12:49:28.655683 Adding foreign keys
52# 2015-05-29 12:49:29.365350 Done
53#
54# To browse the database, psql can be used e.g.
55#
56# $ psql pt_example
57# pt_example=# select * from samples_view where id < 100;
58# pt_example=# \d+
59# pt_example=# \d+ samples_view
60# pt_example=# \q
61#
62# An example of using the database is provided by the script
63# call-graph-from-postgresql.py. Refer to that script for details.
64
18from PySide.QtSql import * 65from PySide.QtSql import *
19 66
20# Need to access PostgreSQL C library directly to use COPY FROM STDIN 67# Need to access PostgreSQL C library directly to use COPY FROM STDIN
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index d20d6e6ab65b..c1518bdd0f1b 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -32,6 +32,7 @@ perf-y += sample-parsing.o
32perf-y += parse-no-sample-id-all.o 32perf-y += parse-no-sample-id-all.o
33perf-y += kmod-path.o 33perf-y += kmod-path.o
34perf-y += thread-map.o 34perf-y += thread-map.o
35perf-y += llvm.o
35 36
36perf-$(CONFIG_X86) += perf-time-to-tsc.o 37perf-$(CONFIG_X86) += perf-time-to-tsc.o
37 38
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index c1dde733c3a6..136cd934be66 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -175,6 +175,10 @@ static struct test {
175 .func = test__thread_map, 175 .func = test__thread_map,
176 }, 176 },
177 { 177 {
178 .desc = "Test LLVM searching and compiling",
179 .func = test__llvm,
180 },
181 {
178 .func = NULL, 182 .func = NULL,
179 }, 183 },
180}; 184};
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7d82c8be5e36..7ed737019de7 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
279 279
280 symbol_conf.use_callchain = false; 280 symbol_conf.use_callchain = false;
281 symbol_conf.cumulate_callchain = false; 281 symbol_conf.cumulate_callchain = false;
282 perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
282 283
283 setup_sorting(); 284 setup_sorting();
284 callchain_register_param(&callchain_param); 285 callchain_register_param(&callchain_param);
@@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
425 426
426 symbol_conf.use_callchain = true; 427 symbol_conf.use_callchain = true;
427 symbol_conf.cumulate_callchain = false; 428 symbol_conf.cumulate_callchain = false;
429 perf_evsel__set_sample_bit(evsel, CALLCHAIN);
428 430
429 setup_sorting(); 431 setup_sorting();
430 callchain_register_param(&callchain_param); 432 callchain_register_param(&callchain_param);
@@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
482 484
483 symbol_conf.use_callchain = false; 485 symbol_conf.use_callchain = false;
484 symbol_conf.cumulate_callchain = true; 486 symbol_conf.cumulate_callchain = true;
487 perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
485 488
486 setup_sorting(); 489 setup_sorting();
487 callchain_register_param(&callchain_param); 490 callchain_register_param(&callchain_param);
@@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
665 668
666 symbol_conf.use_callchain = true; 669 symbol_conf.use_callchain = true;
667 symbol_conf.cumulate_callchain = true; 670 symbol_conf.cumulate_callchain = true;
671 perf_evsel__set_sample_bit(evsel, CALLCHAIN);
668 672
669 setup_sorting(); 673 setup_sorting();
670 callchain_register_param(&callchain_param); 674 callchain_register_param(&callchain_param);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
new file mode 100644
index 000000000000..a337356fd979
--- /dev/null
+++ b/tools/perf/tests/llvm.c
@@ -0,0 +1,98 @@
1#include <stdio.h>
2#include <bpf/libbpf.h>
3#include <util/llvm-utils.h>
4#include <util/cache.h>
5#include "tests.h"
6#include "debug.h"
7
8static int perf_config_cb(const char *var, const char *val,
9 void *arg __maybe_unused)
10{
11 return perf_default_config(var, val, arg);
12}
13
14/*
15 * Randomly give it a "version" section since we don't really load it
16 * into kernel
17 */
18static const char test_bpf_prog[] =
19 "__attribute__((section(\"do_fork\"), used)) "
20 "int fork(void *ctx) {return 0;} "
21 "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";"
22 "int _version __attribute__((section(\"version\"), used)) = 0x40100;";
23
24#ifdef HAVE_LIBBPF_SUPPORT
25static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
26{
27 struct bpf_object *obj;
28
29 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz);
30 if (!obj)
31 return -1;
32 bpf_object__close(obj);
33 return 0;
34}
35#else
36static int test__bpf_parsing(void *obj_buf __maybe_unused,
37 size_t obj_buf_sz __maybe_unused)
38{
39 fprintf(stderr, " (skip bpf parsing)");
40 return 0;
41}
42#endif
43
44int test__llvm(void)
45{
46 char *tmpl_new, *clang_opt_new;
47 void *obj_buf;
48 size_t obj_buf_sz;
49 int err, old_verbose;
50
51 perf_config(perf_config_cb, NULL);
52
53 /*
54 * Skip this test if user's .perfconfig doesn't set [llvm] section
55 * and clang is not found in $PATH, and this is not perf test -v
56 */
57 if (verbose == 0 && !llvm_param.user_set_param && llvm__search_clang()) {
58 fprintf(stderr, " (no clang, try 'perf test -v LLVM')");
59 return TEST_SKIP;
60 }
61
62 old_verbose = verbose;
63 /*
64 * llvm is verbosity when error. Suppress all error output if
65 * not 'perf test -v'.
66 */
67 if (verbose == 0)
68 verbose = -1;
69
70 if (!llvm_param.clang_bpf_cmd_template)
71 return -1;
72
73 if (!llvm_param.clang_opt)
74 llvm_param.clang_opt = strdup("");
75
76 err = asprintf(&tmpl_new, "echo '%s' | %s", test_bpf_prog,
77 llvm_param.clang_bpf_cmd_template);
78 if (err < 0)
79 return -1;
80 err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt);
81 if (err < 0)
82 return -1;
83
84 llvm_param.clang_bpf_cmd_template = tmpl_new;
85 llvm_param.clang_opt = clang_opt_new;
86 err = llvm__compile_bpf("-", &obj_buf, &obj_buf_sz);
87
88 verbose = old_verbose;
89 if (err) {
90 if (!verbose)
91 fprintf(stderr, " (use -v to see error message)");
92 return -1;
93 }
94
95 err = test__bpf_parsing(obj_buf, obj_buf_sz);
96 free(obj_buf);
97 return err;
98}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 729112f4cfaa..ba31c4bd441d 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -58,7 +58,8 @@ make_install_man := install-man
58make_install_html := install-html 58make_install_html := install-html
59make_install_info := install-info 59make_install_info := install-info
60make_install_pdf := install-pdf 60make_install_pdf := install-pdf
61make_install_prefix := install prefix=/tmp/krava 61make_install_prefix := install prefix=/tmp/krava
62make_install_prefix_slash := install prefix=/tmp/krava/
62make_static := LDFLAGS=-static 63make_static := LDFLAGS=-static
63 64
64# all the NO_* variable combined 65# all the NO_* variable combined
@@ -101,6 +102,7 @@ run += make_util_pmu_bison_o
101run += make_install 102run += make_install
102run += make_install_bin 103run += make_install_bin
103run += make_install_prefix 104run += make_install_prefix
105run += make_install_prefix_slash
104# FIXME 'install-*' commented out till they're fixed 106# FIXME 'install-*' commented out till they're fixed
105# run += make_install_doc 107# run += make_install_doc
106# run += make_install_man 108# run += make_install_man
@@ -175,11 +177,14 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all))
175test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) 177test_make_install_bin := $(call test_dest_files,$(installed_files_bin))
176test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) 178test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin))
177 179
178# We prefix all installed files for make_install_prefix 180# We prefix all installed files for make_install_prefix(_slash)
179# with '/tmp/krava' to match installed/prefix-ed files. 181# with '/tmp/krava' to match installed/prefix-ed files.
180installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) 182installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all))
181test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) 183test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix))
182test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) 184test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix))
185
186test_make_install_prefix_slash := $(test_make_install_prefix)
187test_make_install_prefix_slash_O := $(test_make_install_prefix_O)
183 188
184# FIXME nothing gets installed 189# FIXME nothing gets installed
185test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 190test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index d76963f7ad3d..9b6b2b6324a1 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist)
82 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); 82 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
83 TEST_ASSERT_VAL("wrong config", 83 TEST_ASSERT_VAL("wrong config",
84 PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); 84 PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
85 /*
86 * The period value gets configured within perf_evlist__config,
87 * while this test executes only parse events method.
88 */
85 TEST_ASSERT_VAL("wrong period", 89 TEST_ASSERT_VAL("wrong period",
86 100000 == evsel->attr.sample_period); 90 0 == evsel->attr.sample_period);
87 TEST_ASSERT_VAL("wrong config1", 91 TEST_ASSERT_VAL("wrong config1",
88 0 == evsel->attr.config1); 92 0 == evsel->attr.config1);
89 TEST_ASSERT_VAL("wrong config2", 93 TEST_ASSERT_VAL("wrong config2",
@@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist)
406 TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config); 410 TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config);
407 TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1); 411 TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1);
408 TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2); 412 TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2);
409 TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period); 413 /*
414 * The period value gets configured within perf_evlist__config,
415 * while this test executes only parse events method.
416 */
417 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
410 418
411 return 0; 419 return 0;
412} 420}
@@ -471,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
471 return 0; 479 return 0;
472} 480}
473 481
482static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
483{
484 struct perf_evsel *evsel = perf_evlist__first(evlist);
485
486 /* cpu/config=1,call-graph=fp,time,period=100000/ */
487 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
488 TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
489 TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
490 /*
491 * The period, time and callgraph value gets configured
492 * within perf_evlist__config,
493 * while this test executes only parse events method.
494 */
495 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
496 TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
497 TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
498
499 /* cpu/config=2,call-graph=no,time=0,period=2000/ */
500 evsel = perf_evsel__next(evsel);
501 TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
502 TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
503 /*
504 * The period, time and callgraph value gets configured
505 * within perf_evlist__config,
506 * while this test executes only parse events method.
507 */
508 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
509 TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
510 TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
511
512 return 0;
513}
514
474static int test__checkevent_pmu_events(struct perf_evlist *evlist) 515static int test__checkevent_pmu_events(struct perf_evlist *evlist)
475{ 516{
476 struct perf_evsel *evsel = perf_evlist__first(evlist); 517 struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1547,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
1547 .check = test__checkevent_pmu_name, 1588 .check = test__checkevent_pmu_name,
1548 .id = 1, 1589 .id = 1,
1549 }, 1590 },
1591 {
1592 .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
1593 .check = test__checkevent_pmu_partial_time_callgraph,
1594 .id = 2,
1595 },
1550}; 1596};
1551 1597
1552struct terms_test { 1598struct terms_test {
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index ebb47d96bc0b..bf113a247987 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -62,6 +62,7 @@ int test__fdarray__filter(void);
62int test__fdarray__add(void); 62int test__fdarray__add(void);
63int test__kmod_path__parse(void); 63int test__kmod_path__parse(void);
64int test__thread_map(void); 64int test__thread_map(void);
65int test__llvm(void);
65 66
66#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) 67#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
67#ifdef HAVE_DWARF_UNWIND_SUPPORT 68#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 5acf000939ea..138a0e3431fa 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -20,6 +20,8 @@ int test__thread_map(void)
20 TEST_ASSERT_VAL("wrong comm", 20 TEST_ASSERT_VAL("wrong comm",
21 thread_map__comm(map, 0) && 21 thread_map__comm(map, 0) &&
22 !strcmp(thread_map__comm(map, 0), "perf")); 22 !strcmp(thread_map__comm(map, 0), "perf"));
23 TEST_ASSERT_VAL("wrong refcnt",
24 atomic_read(&map->refcnt) == 1);
23 thread_map__put(map); 25 thread_map__put(map);
24 26
25 /* test dummy pid */ 27 /* test dummy pid */
@@ -33,6 +35,8 @@ int test__thread_map(void)
33 TEST_ASSERT_VAL("wrong comm", 35 TEST_ASSERT_VAL("wrong comm",
34 thread_map__comm(map, 0) && 36 thread_map__comm(map, 0) &&
35 !strcmp(thread_map__comm(map, 0), "dummy")); 37 !strcmp(thread_map__comm(map, 0), "dummy"));
38 TEST_ASSERT_VAL("wrong refcnt",
39 atomic_read(&map->refcnt) == 1);
36 thread_map__put(map); 40 thread_map__put(map);
37 return 0; 41 return 0;
38} 42}
diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file
new file mode 100644
index 000000000000..62378a899d79
--- /dev/null
+++ b/tools/perf/trace/strace/groups/file
@@ -0,0 +1,18 @@
1access
2chmod
3creat
4execve
5faccessat
6getcwd
7lstat
8mkdir
9open
10openat
11quotactl
12readlink
13rename
14rmdir
15stat
16statfs
17symlink
18unlink
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 6680fa5cb9dd..c6c7e5189214 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -46,6 +46,21 @@ void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
46 SLsmg_gotorc(browser->y + y, browser->x + x); 46 SLsmg_gotorc(browser->y + y, browser->x + x);
47} 47}
48 48
49void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
50 unsigned int width)
51{
52 slsmg_write_nstring(msg, width);
53}
54
55void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...)
56{
57 va_list args;
58
59 va_start(args, fmt);
60 slsmg_vprintf(fmt, args);
61 va_end(args);
62}
63
49static struct list_head * 64static struct list_head *
50ui_browser__list_head_filter_entries(struct ui_browser *browser, 65ui_browser__list_head_filter_entries(struct ui_browser *browser,
51 struct list_head *pos) 66 struct list_head *pos)
@@ -234,7 +249,7 @@ void __ui_browser__show_title(struct ui_browser *browser, const char *title)
234{ 249{
235 SLsmg_gotorc(0, 0); 250 SLsmg_gotorc(0, 0);
236 ui_browser__set_color(browser, HE_COLORSET_ROOT); 251 ui_browser__set_color(browser, HE_COLORSET_ROOT);
237 slsmg_write_nstring(title, browser->width + 1); 252 ui_browser__write_nstring(browser, title, browser->width + 1);
238} 253}
239 254
240void ui_browser__show_title(struct ui_browser *browser, const char *title) 255void ui_browser__show_title(struct ui_browser *browser, const char *title)
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 92ae72113965..f3cef564de02 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -37,6 +37,9 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser);
37void ui_browser__reset_index(struct ui_browser *browser); 37void ui_browser__reset_index(struct ui_browser *browser);
38 38
39void ui_browser__gotorc(struct ui_browser *browser, int y, int x); 39void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
40void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
41 unsigned int width);
42void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
40void ui_browser__write_graph(struct ui_browser *browser, int graph); 43void ui_browser__write_graph(struct ui_browser *browser, int graph);
41void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, 44void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
42 u64 start, u64 end); 45 u64 start, u64 end);
@@ -58,8 +61,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text);
58bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); 61bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text);
59int ui_browser__input_window(const char *title, const char *text, char *input, 62int ui_browser__input_window(const char *title, const char *text, char *input,
60 const char *exit_msg, int delay_sec); 63 const char *exit_msg, int delay_sec);
61struct perf_session_env; 64struct perf_env;
62int tui__header_window(struct perf_session_env *env); 65int tui__header_window(struct perf_env *env);
63 66
64void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); 67void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
65unsigned int ui_browser__argv_refresh(struct ui_browser *browser); 68unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 5995a8bd7c69..29739b347599 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1,7 +1,6 @@
1#include "../../util/util.h" 1#include "../../util/util.h"
2#include "../browser.h" 2#include "../browser.h"
3#include "../helpline.h" 3#include "../helpline.h"
4#include "../libslang.h"
5#include "../ui.h" 4#include "../ui.h"
6#include "../util.h" 5#include "../util.h"
7#include "../../util/annotate.h" 6#include "../../util/annotate.h"
@@ -16,6 +15,9 @@ struct disasm_line_samples {
16 u64 nr; 15 u64 nr;
17}; 16};
18 17
18#define IPC_WIDTH 6
19#define CYCLES_WIDTH 6
20
19struct browser_disasm_line { 21struct browser_disasm_line {
20 struct rb_node rb_node; 22 struct rb_node rb_node;
21 u32 idx; 23 u32 idx;
@@ -53,6 +55,7 @@ struct annotate_browser {
53 int max_jump_sources; 55 int max_jump_sources;
54 int nr_jumps; 56 int nr_jumps;
55 bool searching_backwards; 57 bool searching_backwards;
58 bool have_cycles;
56 u8 addr_width; 59 u8 addr_width;
57 u8 jumps_width; 60 u8 jumps_width;
58 u8 target_width; 61 u8 target_width;
@@ -96,6 +99,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
96 return ui_browser__set_color(&browser->b, color); 99 return ui_browser__set_color(&browser->b, color);
97} 100}
98 101
102static int annotate_browser__pcnt_width(struct annotate_browser *ab)
103{
104 int w = 7 * ab->nr_events;
105
106 if (ab->have_cycles)
107 w += IPC_WIDTH + CYCLES_WIDTH;
108 return w;
109}
110
99static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) 111static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
100{ 112{
101 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 113 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -106,7 +118,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
106 (!current_entry || (browser->use_navkeypressed && 118 (!current_entry || (browser->use_navkeypressed &&
107 !browser->navkeypressed))); 119 !browser->navkeypressed)));
108 int width = browser->width, printed; 120 int width = browser->width, printed;
109 int i, pcnt_width = 7 * ab->nr_events; 121 int i, pcnt_width = annotate_browser__pcnt_width(ab);
110 double percent_max = 0.0; 122 double percent_max = 0.0;
111 char bf[256]; 123 char bf[256];
112 124
@@ -116,19 +128,36 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
116 } 128 }
117 129
118 if (dl->offset != -1 && percent_max != 0.0) { 130 if (dl->offset != -1 && percent_max != 0.0) {
119 for (i = 0; i < ab->nr_events; i++) { 131 if (percent_max != 0.0) {
120 ui_browser__set_percent_color(browser, 132 for (i = 0; i < ab->nr_events; i++) {
121 bdl->samples[i].percent, 133 ui_browser__set_percent_color(browser,
122 current_entry); 134 bdl->samples[i].percent,
123 if (annotate_browser__opts.show_total_period) 135 current_entry);
124 slsmg_printf("%6" PRIu64 " ", 136 if (annotate_browser__opts.show_total_period) {
125 bdl->samples[i].nr); 137 ui_browser__printf(browser, "%6" PRIu64 " ",
126 else 138 bdl->samples[i].nr);
127 slsmg_printf("%6.2f ", bdl->samples[i].percent); 139 } else {
140 ui_browser__printf(browser, "%6.2f ",
141 bdl->samples[i].percent);
142 }
143 }
144 } else {
145 ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
128 } 146 }
129 } else { 147 } else {
130 ui_browser__set_percent_color(browser, 0, current_entry); 148 ui_browser__set_percent_color(browser, 0, current_entry);
131 slsmg_write_nstring(" ", pcnt_width); 149 ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
150 }
151 if (ab->have_cycles) {
152 if (dl->ipc)
153 ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc);
154 else
155 ui_browser__write_nstring(browser, " ", IPC_WIDTH);
156 if (dl->cycles)
157 ui_browser__printf(browser, "%*" PRIu64 " ",
158 CYCLES_WIDTH - 1, dl->cycles);
159 else
160 ui_browser__write_nstring(browser, " ", CYCLES_WIDTH);
132 } 161 }
133 162
134 SLsmg_write_char(' '); 163 SLsmg_write_char(' ');
@@ -138,7 +167,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
138 width += 1; 167 width += 1;
139 168
140 if (!*dl->line) 169 if (!*dl->line)
141 slsmg_write_nstring(" ", width - pcnt_width); 170 ui_browser__write_nstring(browser, " ", width - pcnt_width);
142 else if (dl->offset == -1) { 171 else if (dl->offset == -1) {
143 if (dl->line_nr && annotate_browser__opts.show_linenr) 172 if (dl->line_nr && annotate_browser__opts.show_linenr)
144 printed = scnprintf(bf, sizeof(bf), "%-*d ", 173 printed = scnprintf(bf, sizeof(bf), "%-*d ",
@@ -146,8 +175,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
146 else 175 else
147 printed = scnprintf(bf, sizeof(bf), "%*s ", 176 printed = scnprintf(bf, sizeof(bf), "%*s ",
148 ab->addr_width, " "); 177 ab->addr_width, " ");
149 slsmg_write_nstring(bf, printed); 178 ui_browser__write_nstring(browser, bf, printed);
150 slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1); 179 ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width + 1);
151 } else { 180 } else {
152 u64 addr = dl->offset; 181 u64 addr = dl->offset;
153 int color = -1; 182 int color = -1;
@@ -166,7 +195,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
166 bdl->jump_sources); 195 bdl->jump_sources);
167 prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, 196 prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources,
168 current_entry); 197 current_entry);
169 slsmg_write_nstring(bf, printed); 198 ui_browser__write_nstring(browser, bf, printed);
170 ui_browser__set_color(browser, prev); 199 ui_browser__set_color(browser, prev);
171 } 200 }
172 201
@@ -180,7 +209,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
180 209
181 if (change_color) 210 if (change_color)
182 color = ui_browser__set_color(browser, HE_COLORSET_ADDR); 211 color = ui_browser__set_color(browser, HE_COLORSET_ADDR);
183 slsmg_write_nstring(bf, printed); 212 ui_browser__write_nstring(browser, bf, printed);
184 if (change_color) 213 if (change_color)
185 ui_browser__set_color(browser, color); 214 ui_browser__set_color(browser, color);
186 if (dl->ins && dl->ins->ops->scnprintf) { 215 if (dl->ins && dl->ins->ops->scnprintf) {
@@ -194,11 +223,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
194 ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); 223 ui_browser__write_graph(browser, SLSMG_RARROW_CHAR);
195 SLsmg_write_char(' '); 224 SLsmg_write_char(' ');
196 } else { 225 } else {
197 slsmg_write_nstring(" ", 2); 226 ui_browser__write_nstring(browser, " ", 2);
198 } 227 }
199 } else { 228 } else {
200 if (strcmp(dl->name, "retq")) { 229 if (strcmp(dl->name, "retq")) {
201 slsmg_write_nstring(" ", 2); 230 ui_browser__write_nstring(browser, " ", 2);
202 } else { 231 } else {
203 ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); 232 ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
204 SLsmg_write_char(' '); 233 SLsmg_write_char(' ');
@@ -206,7 +235,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
206 } 235 }
207 236
208 disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); 237 disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset);
209 slsmg_write_nstring(bf, width - pcnt_width - 3 - printed); 238 ui_browser__write_nstring(browser, bf, width - pcnt_width - 3 - printed);
210 } 239 }
211 240
212 if (current_entry) 241 if (current_entry)
@@ -231,7 +260,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
231 unsigned int from, to; 260 unsigned int from, to;
232 struct map_symbol *ms = ab->b.priv; 261 struct map_symbol *ms = ab->b.priv;
233 struct symbol *sym = ms->sym; 262 struct symbol *sym = ms->sym;
234 u8 pcnt_width = 7; 263 u8 pcnt_width = annotate_browser__pcnt_width(ab);
235 264
236 /* PLT symbols contain external offsets */ 265 /* PLT symbols contain external offsets */
237 if (strstr(sym->name, "@plt")) 266 if (strstr(sym->name, "@plt"))
@@ -255,8 +284,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
255 to = (u64)btarget->idx; 284 to = (u64)btarget->idx;
256 } 285 }
257 286
258 pcnt_width *= ab->nr_events;
259
260 ui_browser__set_color(browser, HE_COLORSET_CODE); 287 ui_browser__set_color(browser, HE_COLORSET_CODE);
261 __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, 288 __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
262 from, to); 289 from, to);
@@ -266,9 +293,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
266{ 293{
267 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 294 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
268 int ret = ui_browser__list_head_refresh(browser); 295 int ret = ui_browser__list_head_refresh(browser);
269 int pcnt_width; 296 int pcnt_width = annotate_browser__pcnt_width(ab);
270
271 pcnt_width = 7 * ab->nr_events;
272 297
273 if (annotate_browser__opts.jump_arrows) 298 if (annotate_browser__opts.jump_arrows)
274 annotate_browser__draw_current_jump(browser); 299 annotate_browser__draw_current_jump(browser);
@@ -390,7 +415,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
390 max_percent = bpos->samples[i].percent; 415 max_percent = bpos->samples[i].percent;
391 } 416 }
392 417
393 if (max_percent < 0.01) { 418 if (max_percent < 0.01 && pos->ipc == 0) {
394 RB_CLEAR_NODE(&bpos->rb_node); 419 RB_CLEAR_NODE(&bpos->rb_node);
395 continue; 420 continue;
396 } 421 }
@@ -869,6 +894,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
869 return map_symbol__tui_annotate(&he->ms, evsel, hbt); 894 return map_symbol__tui_annotate(&he->ms, evsel, hbt);
870} 895}
871 896
897
898static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
899{
900 unsigned n_insn = 0;
901 u64 offset;
902
903 for (offset = start; offset <= end; offset++) {
904 if (browser->offsets[offset])
905 n_insn++;
906 }
907 return n_insn;
908}
909
910static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
911 struct cyc_hist *ch)
912{
913 unsigned n_insn;
914 u64 offset;
915
916 n_insn = count_insn(browser, start, end);
917 if (n_insn && ch->num && ch->cycles) {
918 float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
919
920 /* Hide data when there are too many overlaps. */
921 if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
922 return;
923
924 for (offset = start; offset <= end; offset++) {
925 struct disasm_line *dl = browser->offsets[offset];
926
927 if (dl)
928 dl->ipc = ipc;
929 }
930 }
931}
932
933/*
934 * This should probably be in util/annotate.c to share with the tty
935 * annotate, but right now we need the per byte offsets arrays,
936 * which are only here.
937 */
938static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
939 struct symbol *sym)
940{
941 u64 offset;
942 struct annotation *notes = symbol__annotation(sym);
943
944 if (!notes->src || !notes->src->cycles_hist)
945 return;
946
947 pthread_mutex_lock(&notes->lock);
948 for (offset = 0; offset < size; ++offset) {
949 struct cyc_hist *ch;
950
951 ch = &notes->src->cycles_hist[offset];
952 if (ch && ch->cycles) {
953 struct disasm_line *dl;
954
955 if (ch->have_start)
956 count_and_fill(browser, ch->start, offset, ch);
957 dl = browser->offsets[offset];
958 if (dl && ch->num_aggr)
959 dl->cycles = ch->cycles_aggr / ch->num_aggr;
960 browser->have_cycles = true;
961 }
962 }
963 pthread_mutex_unlock(&notes->lock);
964}
965
872static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, 966static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
873 size_t size) 967 size_t size)
874{ 968{
@@ -991,6 +1085,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
991 } 1085 }
992 1086
993 annotate_browser__mark_jump_targets(&browser, size); 1087 annotate_browser__mark_jump_targets(&browser, size);
1088 annotate__compute_ipc(&browser, size, sym);
994 1089
995 browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); 1090 browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
996 browser.max_addr_width = hex_width(sym->end); 1091 browser.max_addr_width = hex_width(sym->end);
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c
index e8278c558d4a..edbeaaf31ace 100644
--- a/tools/perf/ui/browsers/header.c
+++ b/tools/perf/ui/browsers/header.c
@@ -25,7 +25,7 @@ static void ui_browser__argv_write(struct ui_browser *browser,
25 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : 25 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
26 HE_COLORSET_NORMAL); 26 HE_COLORSET_NORMAL);
27 27
28 slsmg_write_nstring(str, browser->width); 28 ui_browser__write_nstring(browser, str, browser->width);
29} 29}
30 30
31static int list_menu__run(struct ui_browser *menu) 31static int list_menu__run(struct ui_browser *menu)
@@ -91,7 +91,7 @@ static int ui__list_menu(int argc, char * const argv[])
91 return list_menu__run(&menu); 91 return list_menu__run(&menu);
92} 92}
93 93
94int tui__header_window(struct perf_session_env *env) 94int tui__header_window(struct perf_env *env)
95{ 95{
96 int i, argc = 0; 96 int i, argc = 0;
97 char **argv; 97 char **argv;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fa67613976a8..cf86f2d3a5e7 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1,5 +1,4 @@
1#include <stdio.h> 1#include <stdio.h>
2#include "../libslang.h"
3#include <stdlib.h> 2#include <stdlib.h>
4#include <string.h> 3#include <string.h>
5#include <linux/rbtree.h> 4#include <linux/rbtree.h>
@@ -27,7 +26,7 @@ struct hist_browser {
27 struct map_symbol *selection; 26 struct map_symbol *selection;
28 struct hist_browser_timer *hbt; 27 struct hist_browser_timer *hbt;
29 struct pstack *pstack; 28 struct pstack *pstack;
30 struct perf_session_env *env; 29 struct perf_env *env;
31 int print_seq; 30 int print_seq;
32 bool show_dso; 31 bool show_dso;
33 bool show_headers; 32 bool show_headers;
@@ -540,10 +539,10 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser,
540 539
541 ui_browser__set_color(&browser->b, color); 540 ui_browser__set_color(&browser->b, color);
542 hist_browser__gotorc(browser, row, 0); 541 hist_browser__gotorc(browser, row, 0);
543 slsmg_write_nstring(" ", offset); 542 ui_browser__write_nstring(&browser->b, " ", offset);
544 slsmg_printf("%c", folded_sign); 543 ui_browser__printf(&browser->b, "%c", folded_sign);
545 ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); 544 ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
546 slsmg_write_nstring(str, width); 545 ui_browser__write_nstring(&browser->b, str, width);
547} 546}
548 547
549static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused, 548static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused,
@@ -680,7 +679,7 @@ static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
680 ui_browser__set_percent_color(arg->b, percent, arg->current_entry); 679 ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
681 680
682 ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent); 681 ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
683 slsmg_printf("%s", hpp->buf); 682 ui_browser__printf(arg->b, "%s", hpp->buf);
684 683
685 advance_hpp(hpp, ret); 684 advance_hpp(hpp, ret);
686 return ret; 685 return ret;
@@ -713,10 +712,11 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
713 struct hist_entry *he) \ 712 struct hist_entry *he) \
714{ \ 713{ \
715 if (!symbol_conf.cumulate_callchain) { \ 714 if (!symbol_conf.cumulate_callchain) { \
715 struct hpp_arg *arg = hpp->ptr; \
716 int len = fmt->user_len ?: fmt->len; \ 716 int len = fmt->user_len ?: fmt->len; \
717 int ret = scnprintf(hpp->buf, hpp->size, \ 717 int ret = scnprintf(hpp->buf, hpp->size, \
718 "%*s", len, "N/A"); \ 718 "%*s", len, "N/A"); \
719 slsmg_printf("%s", hpp->buf); \ 719 ui_browser__printf(arg->b, "%s", hpp->buf); \
720 \ 720 \
721 return ret; \ 721 return ret; \
722 } \ 722 } \
@@ -801,12 +801,12 @@ static int hist_browser__show_entry(struct hist_browser *browser,
801 801
802 if (first) { 802 if (first) {
803 if (symbol_conf.use_callchain) { 803 if (symbol_conf.use_callchain) {
804 slsmg_printf("%c ", folded_sign); 804 ui_browser__printf(&browser->b, "%c ", folded_sign);
805 width -= 2; 805 width -= 2;
806 } 806 }
807 first = false; 807 first = false;
808 } else { 808 } else {
809 slsmg_printf(" "); 809 ui_browser__printf(&browser->b, " ");
810 width -= 2; 810 width -= 2;
811 } 811 }
812 812
@@ -814,7 +814,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
814 width -= fmt->color(fmt, &hpp, entry); 814 width -= fmt->color(fmt, &hpp, entry);
815 } else { 815 } else {
816 width -= fmt->entry(fmt, &hpp, entry); 816 width -= fmt->entry(fmt, &hpp, entry);
817 slsmg_printf("%s", s); 817 ui_browser__printf(&browser->b, "%s", s);
818 } 818 }
819 } 819 }
820 820
@@ -822,7 +822,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
822 if (!browser->b.navkeypressed) 822 if (!browser->b.navkeypressed)
823 width += 1; 823 width += 1;
824 824
825 slsmg_write_nstring("", width); 825 ui_browser__write_nstring(&browser->b, "", width);
826 826
827 ++row; 827 ++row;
828 ++printed; 828 ++printed;
@@ -899,7 +899,7 @@ static void hist_browser__show_headers(struct hist_browser *browser)
899 hists__scnprintf_headers(headers, sizeof(headers), browser->hists); 899 hists__scnprintf_headers(headers, sizeof(headers), browser->hists);
900 ui_browser__gotorc(&browser->b, 0, 0); 900 ui_browser__gotorc(&browser->b, 0, 0);
901 ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); 901 ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
902 slsmg_write_nstring(headers, browser->b.width + 1); 902 ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
903} 903}
904 904
905static void ui_browser__hists_init_top(struct ui_browser *browser) 905static void ui_browser__hists_init_top(struct ui_browser *browser)
@@ -1214,7 +1214,7 @@ static int hist_browser__dump(struct hist_browser *browser)
1214 1214
1215static struct hist_browser *hist_browser__new(struct hists *hists, 1215static struct hist_browser *hist_browser__new(struct hists *hists,
1216 struct hist_browser_timer *hbt, 1216 struct hist_browser_timer *hbt,
1217 struct perf_session_env *env) 1217 struct perf_env *env)
1218{ 1218{
1219 struct hist_browser *browser = zalloc(sizeof(*browser)); 1219 struct hist_browser *browser = zalloc(sizeof(*browser));
1220 1220
@@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists,
1267 const char *ev_name = perf_evsel__name(evsel); 1267 const char *ev_name = perf_evsel__name(evsel);
1268 char buf[512]; 1268 char buf[512];
1269 size_t buflen = sizeof(buf); 1269 size_t buflen = sizeof(buf);
1270 char ref[30] = " show reference callgraph, ";
1271 bool enable_ref = false;
1270 1272
1271 if (symbol_conf.filter_relative) { 1273 if (symbol_conf.filter_relative) {
1272 nr_samples = hists->stats.nr_non_filtered_samples; 1274 nr_samples = hists->stats.nr_non_filtered_samples;
@@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists,
1292 } 1294 }
1293 } 1295 }
1294 1296
1297 if (symbol_conf.show_ref_callgraph &&
1298 strstr(ev_name, "call-graph=no"))
1299 enable_ref = true;
1295 nr_samples = convert_unit(nr_samples, &unit); 1300 nr_samples = convert_unit(nr_samples, &unit);
1296 printed = scnprintf(bf, size, 1301 printed = scnprintf(bf, size,
1297 "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, 1302 "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
1298 nr_samples, unit, ev_name, nr_events); 1303 nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);
1299 1304
1300 1305
1301 if (hists->uid_filter_str) 1306 if (hists->uid_filter_str)
@@ -1690,7 +1695,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1690 bool left_exits, 1695 bool left_exits,
1691 struct hist_browser_timer *hbt, 1696 struct hist_browser_timer *hbt,
1692 float min_pcnt, 1697 float min_pcnt,
1693 struct perf_session_env *env) 1698 struct perf_env *env)
1694{ 1699{
1695 struct hists *hists = evsel__hists(evsel); 1700 struct hists *hists = evsel__hists(evsel);
1696 struct hist_browser *browser = hist_browser__new(hists, hbt, env); 1701 struct hist_browser *browser = hist_browser__new(hists, hbt, env);
@@ -1868,6 +1873,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1868 case K_RIGHT: 1873 case K_RIGHT:
1869 /* menu */ 1874 /* menu */
1870 break; 1875 break;
1876 case K_ESC:
1871 case K_LEFT: { 1877 case K_LEFT: {
1872 const void *top; 1878 const void *top;
1873 1879
@@ -1877,6 +1883,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1877 */ 1883 */
1878 if (left_exits) 1884 if (left_exits)
1879 goto out_free_stack; 1885 goto out_free_stack;
1886
1887 if (key == K_ESC &&
1888 ui_browser__dialog_yesno(&browser->b,
1889 "Do you really want to exit?"))
1890 goto out_free_stack;
1891
1880 continue; 1892 continue;
1881 } 1893 }
1882 top = pstack__peek(browser->pstack); 1894 top = pstack__peek(browser->pstack);
@@ -1892,12 +1904,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1892 do_zoom_thread(browser, actions); 1904 do_zoom_thread(browser, actions);
1893 continue; 1905 continue;
1894 } 1906 }
1895 case K_ESC:
1896 if (!left_exits &&
1897 !ui_browser__dialog_yesno(&browser->b,
1898 "Do you really want to exit?"))
1899 continue;
1900 /* Fall thru */
1901 case 'q': 1907 case 'q':
1902 case CTRL('c'): 1908 case CTRL('c'):
1903 goto out_free_stack; 1909 goto out_free_stack;
@@ -2010,7 +2016,7 @@ struct perf_evsel_menu {
2010 struct perf_evsel *selection; 2016 struct perf_evsel *selection;
2011 bool lost_events, lost_events_warned; 2017 bool lost_events, lost_events_warned;
2012 float min_pcnt; 2018 float min_pcnt;
2013 struct perf_session_env *env; 2019 struct perf_env *env;
2014}; 2020};
2015 2021
2016static void perf_evsel_menu__write(struct ui_browser *browser, 2022static void perf_evsel_menu__write(struct ui_browser *browser,
@@ -2044,7 +2050,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
2044 nr_events = convert_unit(nr_events, &unit); 2050 nr_events = convert_unit(nr_events, &unit);
2045 printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, 2051 printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
2046 unit, unit == ' ' ? "" : " ", ev_name); 2052 unit, unit == ' ' ? "" : " ", ev_name);
2047 slsmg_printf("%s", bf); 2053 ui_browser__printf(browser, "%s", bf);
2048 2054
2049 nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; 2055 nr_events = hists->stats.nr_events[PERF_RECORD_LOST];
2050 if (nr_events != 0) { 2056 if (nr_events != 0) {
@@ -2057,7 +2063,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
2057 warn = bf; 2063 warn = bf;
2058 } 2064 }
2059 2065
2060 slsmg_write_nstring(warn, browser->width - printed); 2066 ui_browser__write_nstring(browser, warn, browser->width - printed);
2061 2067
2062 if (current_entry) 2068 if (current_entry)
2063 menu->selection = evsel; 2069 menu->selection = evsel;
@@ -2120,15 +2126,11 @@ browse_hists:
2120 else 2126 else
2121 pos = perf_evsel__prev(pos); 2127 pos = perf_evsel__prev(pos);
2122 goto browse_hists; 2128 goto browse_hists;
2123 case K_ESC:
2124 if (!ui_browser__dialog_yesno(&menu->b,
2125 "Do you really want to exit?"))
2126 continue;
2127 /* Fall thru */
2128 case K_SWITCH_INPUT_DATA: 2129 case K_SWITCH_INPUT_DATA:
2129 case 'q': 2130 case 'q':
2130 case CTRL('c'): 2131 case CTRL('c'):
2131 goto out; 2132 goto out;
2133 case K_ESC:
2132 default: 2134 default:
2133 continue; 2135 continue;
2134 } 2136 }
@@ -2167,7 +2169,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
2167 int nr_entries, const char *help, 2169 int nr_entries, const char *help,
2168 struct hist_browser_timer *hbt, 2170 struct hist_browser_timer *hbt,
2169 float min_pcnt, 2171 float min_pcnt,
2170 struct perf_session_env *env) 2172 struct perf_env *env)
2171{ 2173{
2172 struct perf_evsel *pos; 2174 struct perf_evsel *pos;
2173 struct perf_evsel_menu menu = { 2175 struct perf_evsel_menu menu = {
@@ -2200,7 +2202,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
2200int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 2202int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
2201 struct hist_browser_timer *hbt, 2203 struct hist_browser_timer *hbt,
2202 float min_pcnt, 2204 float min_pcnt,
2203 struct perf_session_env *env) 2205 struct perf_env *env)
2204{ 2206{
2205 int nr_entries = evlist->nr_entries; 2207 int nr_entries = evlist->nr_entries;
2206 2208
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index b11639f33682..8c154c7d4669 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -1,4 +1,3 @@
1#include "../libslang.h"
2#include <elf.h> 1#include <elf.h>
3#include <inttypes.h> 2#include <inttypes.h>
4#include <sys/ttydefaults.h> 3#include <sys/ttydefaults.h>
@@ -26,13 +25,13 @@ static void map_browser__write(struct ui_browser *browser, void *nd, int row)
26 int width; 25 int width;
27 26
28 ui_browser__set_percent_color(browser, 0, current_entry); 27 ui_browser__set_percent_color(browser, 0, current_entry);
29 slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ", 28 ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ",
30 mb->addrlen, sym->start, mb->addrlen, sym->end, 29 mb->addrlen, sym->start, mb->addrlen, sym->end,
31 sym->binding == STB_GLOBAL ? 'g' : 30 sym->binding == STB_GLOBAL ? 'g' :
32 sym->binding == STB_LOCAL ? 'l' : 'w'); 31 sym->binding == STB_LOCAL ? 'l' : 'w');
33 width = browser->width - ((mb->addrlen * 2) + 4); 32 width = browser->width - ((mb->addrlen * 2) + 4);
34 if (width > 0) 33 if (width > 0)
35 slsmg_write_nstring(sym->name, width); 34 ui_browser__write_nstring(browser, sym->name, width);
36} 35}
37 36
38/* FIXME uber-kludgy, see comment on cmd_report... */ 37/* FIXME uber-kludgy, see comment on cmd_report... */
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index 402d2bd30b09..e13b48d705ef 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -81,7 +81,7 @@ static void script_browser__write(struct ui_browser *browser,
81 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : 81 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
82 HE_COLORSET_NORMAL); 82 HE_COLORSET_NORMAL);
83 83
84 slsmg_write_nstring(sline->line, browser->width); 84 ui_browser__write_nstring(browser, sline->line, browser->width);
85} 85}
86 86
87static int script_browser__run(struct perf_script_browser *browser) 87static int script_browser__run(struct perf_script_browser *browser)
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h
index 4d54b6450f5b..db816695ad97 100644
--- a/tools/perf/ui/libslang.h
+++ b/tools/perf/ui/libslang.h
@@ -14,12 +14,15 @@
14#if SLANG_VERSION < 20104 14#if SLANG_VERSION < 20104
15#define slsmg_printf(msg, args...) \ 15#define slsmg_printf(msg, args...) \
16 SLsmg_printf((char *)(msg), ##args) 16 SLsmg_printf((char *)(msg), ##args)
17#define slsmg_vprintf(msg, vargs) \
18 SLsmg_vprintf((char *)(msg), vargs)
17#define slsmg_write_nstring(msg, len) \ 19#define slsmg_write_nstring(msg, len) \
18 SLsmg_write_nstring((char *)(msg), len) 20 SLsmg_write_nstring((char *)(msg), len)
19#define sltt_set_color(obj, name, fg, bg) \ 21#define sltt_set_color(obj, name, fg, bg) \
20 SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg)) 22 SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
21#else 23#else
22#define slsmg_printf SLsmg_printf 24#define slsmg_printf SLsmg_printf
25#define slsmg_vprintf SLsmg_vprintf
23#define slsmg_write_nstring SLsmg_write_nstring 26#define slsmg_write_nstring SLsmg_write_nstring
24#define sltt_set_color SLtt_set_color 27#define sltt_set_color SLtt_set_color
25#endif 28#endif
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
index c61d14b101e0..c4b99008e2c9 100644
--- a/tools/perf/ui/tui/progress.c
+++ b/tools/perf/ui/tui/progress.c
@@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p)
33 pthread_mutex_unlock(&ui__lock); 33 pthread_mutex_unlock(&ui__lock);
34} 34}
35 35
36static void tui_progress__finish(void)
37{
38 int y;
39
40 if (use_browser <= 0)
41 return;
42
43 ui__refresh_dimensions(false);
44 pthread_mutex_lock(&ui__lock);
45 y = SLtt_Screen_Rows / 2 - 2;
46 SLsmg_set_color(0);
47 SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' ');
48 SLsmg_refresh();
49 pthread_mutex_unlock(&ui__lock);
50}
51
36static struct ui_progress_ops tui_progress__ops = 52static struct ui_progress_ops tui_progress__ops =
37{ 53{
38 .update = tui_progress__update, 54 .update = tui_progress__update,
55 .finish = tui_progress__finish,
39}; 56};
40 57
41void tui_progress__init(void) 58void tui_progress__init(void)
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c
index bf890f72fe80..d96ad7c8325d 100644
--- a/tools/perf/ui/tui/util.c
+++ b/tools/perf/ui/tui/util.c
@@ -21,7 +21,7 @@ static void ui_browser__argv_write(struct ui_browser *browser,
21 21
22 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : 22 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
23 HE_COLORSET_NORMAL); 23 HE_COLORSET_NORMAL);
24 slsmg_write_nstring(*arg, browser->width); 24 ui_browser__write_nstring(browser, *arg, browser->width);
25} 25}
26 26
27static int popup_menu__run(struct ui_browser *menu) 27static int popup_menu__run(struct ui_browser *menu)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index d2d318c59b37..e912856cc4e5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -14,6 +14,7 @@ libperf-y += find_next_bit.o
14libperf-y += help.o 14libperf-y += help.o
15libperf-y += kallsyms.o 15libperf-y += kallsyms.o
16libperf-y += levenshtein.o 16libperf-y += levenshtein.o
17libperf-y += llvm-utils.o
17libperf-y += parse-options.o 18libperf-y += parse-options.o
18libperf-y += parse-events.o 19libperf-y += parse-events.o
19libperf-y += path.o 20libperf-y += path.o
@@ -67,6 +68,7 @@ libperf-y += target.o
67libperf-y += rblist.o 68libperf-y += rblist.o
68libperf-y += intlist.o 69libperf-y += intlist.o
69libperf-y += vdso.o 70libperf-y += vdso.o
71libperf-y += counts.o
70libperf-y += stat.o 72libperf-y += stat.o
71libperf-y += stat-shadow.o 73libperf-y += stat-shadow.o
72libperf-y += record.o 74libperf-y += record.o
@@ -76,9 +78,13 @@ libperf-$(CONFIG_X86) += tsc.o
76libperf-y += cloexec.o 78libperf-y += cloexec.o
77libperf-y += thread-stack.o 79libperf-y += thread-stack.o
78libperf-$(CONFIG_AUXTRACE) += auxtrace.o 80libperf-$(CONFIG_AUXTRACE) += auxtrace.o
81libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
82libperf-$(CONFIG_AUXTRACE) += intel-pt.o
83libperf-$(CONFIG_AUXTRACE) += intel-bts.o
79libperf-y += parse-branch-options.o 84libperf-y += parse-branch-options.o
80 85
81libperf-$(CONFIG_LIBELF) += symbol-elf.o 86libperf-$(CONFIG_LIBELF) += symbol-elf.o
87libperf-$(CONFIG_LIBELF) += probe-file.o
82libperf-$(CONFIG_LIBELF) += probe-event.o 88libperf-$(CONFIG_LIBELF) += probe-event.o
83 89
84ifndef CONFIG_LIBELF 90ifndef CONFIG_LIBELF
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 03b7bc70eb66..d1eece70b84d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
473 return 0; 473 return 0;
474} 474}
475 475
476/* The cycles histogram is lazily allocated. */
477static int symbol__alloc_hist_cycles(struct symbol *sym)
478{
479 struct annotation *notes = symbol__annotation(sym);
480 const size_t size = symbol__size(sym);
481
482 notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
483 if (notes->src->cycles_hist == NULL)
484 return -1;
485 return 0;
486}
487
476void symbol__annotate_zero_histograms(struct symbol *sym) 488void symbol__annotate_zero_histograms(struct symbol *sym)
477{ 489{
478 struct annotation *notes = symbol__annotation(sym); 490 struct annotation *notes = symbol__annotation(sym);
479 491
480 pthread_mutex_lock(&notes->lock); 492 pthread_mutex_lock(&notes->lock);
481 if (notes->src != NULL) 493 if (notes->src != NULL) {
482 memset(notes->src->histograms, 0, 494 memset(notes->src->histograms, 0,
483 notes->src->nr_histograms * notes->src->sizeof_sym_hist); 495 notes->src->nr_histograms * notes->src->sizeof_sym_hist);
496 if (notes->src->cycles_hist)
497 memset(notes->src->cycles_hist, 0,
498 symbol__size(sym) * sizeof(struct cyc_hist));
499 }
484 pthread_mutex_unlock(&notes->lock); 500 pthread_mutex_unlock(&notes->lock);
485} 501}
486 502
503static int __symbol__account_cycles(struct annotation *notes,
504 u64 start,
505 unsigned offset, unsigned cycles,
506 unsigned have_start)
507{
508 struct cyc_hist *ch;
509
510 ch = notes->src->cycles_hist;
511 /*
512 * For now we can only account one basic block per
513 * final jump. But multiple could be overlapping.
514 * Always account the longest one. So when
515 * a shorter one has been already seen throw it away.
516 *
517 * We separately always account the full cycles.
518 */
519 ch[offset].num_aggr++;
520 ch[offset].cycles_aggr += cycles;
521
522 if (!have_start && ch[offset].have_start)
523 return 0;
524 if (ch[offset].num) {
525 if (have_start && (!ch[offset].have_start ||
526 ch[offset].start > start)) {
527 ch[offset].have_start = 0;
528 ch[offset].cycles = 0;
529 ch[offset].num = 0;
530 if (ch[offset].reset < 0xffff)
531 ch[offset].reset++;
532 } else if (have_start &&
533 ch[offset].start < start)
534 return 0;
535 }
536 ch[offset].have_start = have_start;
537 ch[offset].start = start;
538 ch[offset].cycles += cycles;
539 ch[offset].num++;
540 return 0;
541}
542
487static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, 543static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
488 struct annotation *notes, int evidx, u64 addr) 544 struct annotation *notes, int evidx, u64 addr)
489{ 545{
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
506 return 0; 562 return 0;
507} 563}
508 564
509static struct annotation *symbol__get_annotation(struct symbol *sym) 565static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
510{ 566{
511 struct annotation *notes = symbol__annotation(sym); 567 struct annotation *notes = symbol__annotation(sym);
512 568
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
514 if (symbol__alloc_hist(sym) < 0) 570 if (symbol__alloc_hist(sym) < 0)
515 return NULL; 571 return NULL;
516 } 572 }
573 if (!notes->src->cycles_hist && cycles) {
574 if (symbol__alloc_hist_cycles(sym) < 0)
575 return NULL;
576 }
517 return notes; 577 return notes;
518} 578}
519 579
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
524 584
525 if (sym == NULL) 585 if (sym == NULL)
526 return 0; 586 return 0;
527 notes = symbol__get_annotation(sym); 587 notes = symbol__get_annotation(sym, false);
528 if (notes == NULL) 588 if (notes == NULL)
529 return -ENOMEM; 589 return -ENOMEM;
530 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); 590 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
531} 591}
532 592
593static int symbol__account_cycles(u64 addr, u64 start,
594 struct symbol *sym, unsigned cycles)
595{
596 struct annotation *notes;
597 unsigned offset;
598
599 if (sym == NULL)
600 return 0;
601 notes = symbol__get_annotation(sym, true);
602 if (notes == NULL)
603 return -ENOMEM;
604 if (addr < sym->start || addr >= sym->end)
605 return -ERANGE;
606
607 if (start) {
608 if (start < sym->start || start >= sym->end)
609 return -ERANGE;
610 if (start >= addr)
611 start = 0;
612 }
613 offset = addr - sym->start;
614 return __symbol__account_cycles(notes,
615 start ? start - sym->start : 0,
616 offset, cycles,
617 !!start);
618}
619
620int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
621 struct addr_map_symbol *start,
622 unsigned cycles)
623{
624 u64 saddr = 0;
625 int err;
626
627 if (!cycles)
628 return 0;
629
630 /*
631 * Only set start when IPC can be computed. We can only
632 * compute it when the basic block is completely in a single
633 * function.
634 * Special case the case when the jump is elsewhere, but
635 * it starts on the function start.
636 */
637 if (start &&
638 (start->sym == ams->sym ||
639 (ams->sym &&
640 start->addr == ams->sym->start + ams->map->start)))
641 saddr = start->al_addr;
642 if (saddr == 0)
643 pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
644 ams->addr,
645 start ? start->addr : 0,
646 ams->sym ? ams->sym->start + ams->map->start : 0,
647 saddr);
648 err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
649 if (err)
650 pr_debug2("account_cycles failed %d\n", err);
651 return err;
652}
653
533int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) 654int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
534{ 655{
535 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); 656 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
@@ -1005,6 +1126,7 @@ fallback:
1005 dso->annotate_warned = 1; 1126 dso->annotate_warned = 1;
1006 pr_err("Can't annotate %s:\n\n" 1127 pr_err("Can't annotate %s:\n\n"
1007 "No vmlinux file%s\nwas found in the path.\n\n" 1128 "No vmlinux file%s\nwas found in the path.\n\n"
1129 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
1008 "Please use:\n\n" 1130 "Please use:\n\n"
1009 " perf buildid-cache -vu vmlinux\n\n" 1131 " perf buildid-cache -vu vmlinux\n\n"
1010 "or:\n\n" 1132 "or:\n\n"
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 7e78e6c27078..e9996092a093 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -59,6 +59,8 @@ struct disasm_line {
59 char *name; 59 char *name;
60 struct ins *ins; 60 struct ins *ins;
61 int line_nr; 61 int line_nr;
62 float ipc;
63 u64 cycles;
62 struct ins_operands ops; 64 struct ins_operands ops;
63}; 65};
64 66
@@ -79,6 +81,17 @@ struct sym_hist {
79 u64 addr[0]; 81 u64 addr[0];
80}; 82};
81 83
84struct cyc_hist {
85 u64 start;
86 u64 cycles;
87 u64 cycles_aggr;
88 u32 num;
89 u32 num_aggr;
90 u8 have_start;
91 /* 1 byte padding */
92 u16 reset;
93};
94
82struct source_line_samples { 95struct source_line_samples {
83 double percent; 96 double percent;
84 double percent_sum; 97 double percent_sum;
@@ -97,6 +110,7 @@ struct source_line {
97 * @histogram: Array of addr hit histograms per event being monitored 110 * @histogram: Array of addr hit histograms per event being monitored
98 * @lines: If 'print_lines' is specified, per source code line percentages 111 * @lines: If 'print_lines' is specified, per source code line percentages
99 * @source: source parsed from a disassembler like objdump -dS 112 * @source: source parsed from a disassembler like objdump -dS
113 * @cyc_hist: Average cycles per basic block
100 * 114 *
101 * lines is allocated, percentages calculated and all sorted by percentage 115 * lines is allocated, percentages calculated and all sorted by percentage
102 * when the annotation is about to be presented, so the percentages are for 116 * when the annotation is about to be presented, so the percentages are for
@@ -109,6 +123,7 @@ struct annotated_source {
109 struct source_line *lines; 123 struct source_line *lines;
110 int nr_histograms; 124 int nr_histograms;
111 int sizeof_sym_hist; 125 int sizeof_sym_hist;
126 struct cyc_hist *cycles_hist;
112 struct sym_hist histograms[0]; 127 struct sym_hist histograms[0];
113}; 128};
114 129
@@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
130 145
131int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); 146int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
132 147
148int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
149 struct addr_map_symbol *start,
150 unsigned cycles);
151
133int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); 152int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
134 153
135int symbol__alloc_hist(struct symbol *sym); 154int symbol__alloc_hist(struct symbol *sym);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 83d9dd96fe08..a980e7c50ee0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -47,6 +47,9 @@
47#include "debug.h" 47#include "debug.h"
48#include "parse-options.h" 48#include "parse-options.h"
49 49
50#include "intel-pt.h"
51#include "intel-bts.h"
52
50int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, 53int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
51 struct auxtrace_mmap_params *mp, 54 struct auxtrace_mmap_params *mp,
52 void *userpg, int fd) 55 void *userpg, int fd)
@@ -876,7 +879,7 @@ static bool auxtrace__dont_decode(struct perf_session *session)
876 879
877int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, 880int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
878 union perf_event *event, 881 union perf_event *event,
879 struct perf_session *session __maybe_unused) 882 struct perf_session *session)
880{ 883{
881 enum auxtrace_type type = event->auxtrace_info.type; 884 enum auxtrace_type type = event->auxtrace_info.type;
882 885
@@ -884,6 +887,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
884 fprintf(stdout, " type: %u\n", type); 887 fprintf(stdout, " type: %u\n", type);
885 888
886 switch (type) { 889 switch (type) {
890 case PERF_AUXTRACE_INTEL_PT:
891 return intel_pt_process_auxtrace_info(event, session);
892 case PERF_AUXTRACE_INTEL_BTS:
893 return intel_bts_process_auxtrace_info(event, session);
887 case PERF_AUXTRACE_UNKNOWN: 894 case PERF_AUXTRACE_UNKNOWN:
888 default: 895 default:
889 return -EINVAL; 896 return -EINVAL;
@@ -942,6 +949,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
942 struct itrace_synth_opts *synth_opts = opt->value; 949 struct itrace_synth_opts *synth_opts = opt->value;
943 const char *p; 950 const char *p;
944 char *endptr; 951 char *endptr;
952 bool period_type_set = false;
945 953
946 synth_opts->set = true; 954 synth_opts->set = true;
947 955
@@ -970,10 +978,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
970 case 'i': 978 case 'i':
971 synth_opts->period_type = 979 synth_opts->period_type =
972 PERF_ITRACE_PERIOD_INSTRUCTIONS; 980 PERF_ITRACE_PERIOD_INSTRUCTIONS;
981 period_type_set = true;
973 break; 982 break;
974 case 't': 983 case 't':
975 synth_opts->period_type = 984 synth_opts->period_type =
976 PERF_ITRACE_PERIOD_TICKS; 985 PERF_ITRACE_PERIOD_TICKS;
986 period_type_set = true;
977 break; 987 break;
978 case 'm': 988 case 'm':
979 synth_opts->period *= 1000; 989 synth_opts->period *= 1000;
@@ -986,6 +996,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
986 goto out_err; 996 goto out_err;
987 synth_opts->period_type = 997 synth_opts->period_type =
988 PERF_ITRACE_PERIOD_NANOSECS; 998 PERF_ITRACE_PERIOD_NANOSECS;
999 period_type_set = true;
989 break; 1000 break;
990 case '\0': 1001 case '\0':
991 goto out; 1002 goto out;
@@ -1039,7 +1050,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
1039 } 1050 }
1040out: 1051out:
1041 if (synth_opts->instructions) { 1052 if (synth_opts->instructions) {
1042 if (!synth_opts->period_type) 1053 if (!period_type_set)
1043 synth_opts->period_type = 1054 synth_opts->period_type =
1044 PERF_ITRACE_DEFAULT_PERIOD_TYPE; 1055 PERF_ITRACE_DEFAULT_PERIOD_TYPE;
1045 if (!synth_opts->period) 1056 if (!synth_opts->period)
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 471aecbc4d68..bf72b77a588a 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -39,6 +39,8 @@ struct events_stats;
39 39
40enum auxtrace_type { 40enum auxtrace_type {
41 PERF_AUXTRACE_UNKNOWN, 41 PERF_AUXTRACE_UNKNOWN,
42 PERF_AUXTRACE_INTEL_PT,
43 PERF_AUXTRACE_INTEL_BTS,
42}; 44};
43 45
44enum itrace_period_type { 46enum itrace_period_type {
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 1f6fc2323ef9..d909459fb54c 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -93,6 +93,38 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
93 return raw - build_id; 93 return raw - build_id;
94} 94}
95 95
96int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
97{
98 char notes[PATH_MAX];
99 u8 build_id[BUILD_ID_SIZE];
100 int ret;
101
102 if (!root_dir)
103 root_dir = "";
104
105 scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
106
107 ret = sysfs__read_build_id(notes, build_id, sizeof(build_id));
108 if (ret < 0)
109 return ret;
110
111 return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
112}
113
114int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
115{
116 u8 build_id[BUILD_ID_SIZE];
117 int ret;
118
119 ret = filename__read_build_id(pathname, build_id, sizeof(build_id));
120 if (ret < 0)
121 return ret;
122 else if (ret != sizeof(build_id))
123 return -EINVAL;
124
125 return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
126}
127
96/* asnprintf consolidates asprintf and snprintf */ 128/* asnprintf consolidates asprintf and snprintf */
97static int asnprintf(char **strp, size_t size, const char *fmt, ...) 129static int asnprintf(char **strp, size_t size, const char *fmt, ...)
98{ 130{
@@ -124,7 +156,7 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
124 156
125char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) 157char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
126{ 158{
127 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 159 char build_id_hex[SBUILD_ID_SIZE];
128 160
129 if (!dso->has_build_id) 161 if (!dso->has_build_id)
130 return NULL; 162 return NULL;
@@ -291,7 +323,7 @@ int build_id_cache__list_build_ids(const char *pathname,
291 struct dirent *d; 323 struct dirent *d;
292 int ret = 0; 324 int ret = 0;
293 325
294 list = strlist__new(true, NULL); 326 list = strlist__new(NULL, NULL);
295 dir_name = build_id_cache__dirname_from_path(pathname, false, false); 327 dir_name = build_id_cache__dirname_from_path(pathname, false, false);
296 if (!list || !dir_name) { 328 if (!list || !dir_name) {
297 ret = -ENOMEM; 329 ret = -ENOMEM;
@@ -384,7 +416,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
384 const char *name, bool is_kallsyms, 416 const char *name, bool is_kallsyms,
385 bool is_vdso) 417 bool is_vdso)
386{ 418{
387 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 419 char sbuild_id[SBUILD_ID_SIZE];
388 420
389 build_id__sprintf(build_id, build_id_size, sbuild_id); 421 build_id__sprintf(build_id, build_id_size, sbuild_id);
390 422
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 85011222cc14..27a14a8a945b 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -1,7 +1,8 @@
1#ifndef PERF_BUILD_ID_H_ 1#ifndef PERF_BUILD_ID_H_
2#define PERF_BUILD_ID_H_ 1 2#define PERF_BUILD_ID_H_ 1
3 3
4#define BUILD_ID_SIZE 20 4#define BUILD_ID_SIZE 20
5#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
5 6
6#include "tool.h" 7#include "tool.h"
7#include "strlist.h" 8#include "strlist.h"
@@ -11,6 +12,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops;
11struct dso; 12struct dso;
12 13
13int build_id__sprintf(const u8 *build_id, int len, char *bf); 14int build_id__sprintf(const u8 *build_id, int len, char *bf);
15int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
16int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
17
14char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); 18char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
15 19
16int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, 20int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f643ee77001..773fe13ce627 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -25,96 +25,9 @@
25 25
26__thread struct callchain_cursor callchain_cursor; 26__thread struct callchain_cursor callchain_cursor;
27 27
28#ifdef HAVE_DWARF_UNWIND_SUPPORT 28int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
29static int get_stack_size(const char *str, unsigned long *_size)
30{
31 char *endptr;
32 unsigned long size;
33 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
34
35 size = strtoul(str, &endptr, 0);
36
37 do {
38 if (*endptr)
39 break;
40
41 size = round_up(size, sizeof(u64));
42 if (!size || size > max_size)
43 break;
44
45 *_size = size;
46 return 0;
47
48 } while (0);
49
50 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
51 max_size, str);
52 return -1;
53}
54#endif /* HAVE_DWARF_UNWIND_SUPPORT */
55
56int parse_callchain_record_opt(const char *arg)
57{ 29{
58 char *tok, *name, *saveptr = NULL; 30 return parse_callchain_record(arg, param);
59 char *buf;
60 int ret = -1;
61
62 /* We need buffer that we know we can write to. */
63 buf = malloc(strlen(arg) + 1);
64 if (!buf)
65 return -ENOMEM;
66
67 strcpy(buf, arg);
68
69 tok = strtok_r((char *)buf, ",", &saveptr);
70 name = tok ? : (char *)buf;
71
72 do {
73 /* Framepointer style */
74 if (!strncmp(name, "fp", sizeof("fp"))) {
75 if (!strtok_r(NULL, ",", &saveptr)) {
76 callchain_param.record_mode = CALLCHAIN_FP;
77 ret = 0;
78 } else
79 pr_err("callchain: No more arguments "
80 "needed for --call-graph fp\n");
81 break;
82
83#ifdef HAVE_DWARF_UNWIND_SUPPORT
84 /* Dwarf style */
85 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
86 const unsigned long default_stack_dump_size = 8192;
87
88 ret = 0;
89 callchain_param.record_mode = CALLCHAIN_DWARF;
90 callchain_param.dump_size = default_stack_dump_size;
91
92 tok = strtok_r(NULL, ",", &saveptr);
93 if (tok) {
94 unsigned long size = 0;
95
96 ret = get_stack_size(tok, &size);
97 callchain_param.dump_size = size;
98 }
99#endif /* HAVE_DWARF_UNWIND_SUPPORT */
100 } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
101 if (!strtok_r(NULL, ",", &saveptr)) {
102 callchain_param.record_mode = CALLCHAIN_LBR;
103 ret = 0;
104 } else
105 pr_err("callchain: No more arguments "
106 "needed for --call-graph lbr\n");
107 break;
108 } else {
109 pr_err("callchain: Unknown --call-graph option "
110 "value: %s\n", arg);
111 break;
112 }
113
114 } while (0);
115
116 free(buf);
117 return ret;
118} 31}
119 32
120static int parse_callchain_mode(const char *value) 33static int parse_callchain_mode(const char *value)
@@ -219,7 +132,7 @@ int perf_callchain_config(const char *var, const char *value)
219 var += sizeof("call-graph.") - 1; 132 var += sizeof("call-graph.") - 1;
220 133
221 if (!strcmp(var, "record-mode")) 134 if (!strcmp(var, "record-mode"))
222 return parse_callchain_record_opt(value); 135 return parse_callchain_record_opt(value, &callchain_param);
223#ifdef HAVE_DWARF_UNWIND_SUPPORT 136#ifdef HAVE_DWARF_UNWIND_SUPPORT
224 if (!strcmp(var, "dump-size")) { 137 if (!strcmp(var, "dump-size")) {
225 unsigned long size = 0; 138 unsigned long size = 0;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 679c2c6d8ade..acee2b3cd801 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -177,7 +177,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
177 bool hide_unresolved); 177 bool hide_unresolved);
178 178
179extern const char record_callchain_help[]; 179extern const char record_callchain_help[];
180int parse_callchain_record_opt(const char *arg); 180extern int parse_callchain_record(const char *arg, struct callchain_param *param);
181int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
181int parse_callchain_report_opt(const char *arg); 182int parse_callchain_report_opt(const char *arg);
182int perf_callchain_config(const char *var, const char *value); 183int perf_callchain_config(const char *var, const char *value);
183 184
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
index 68888c29b04a..3bee6773ddb0 100644
--- a/tools/perf/util/cloexec.h
+++ b/tools/perf/util/cloexec.h
@@ -4,7 +4,7 @@
4unsigned long perf_event_open_cloexec_flag(void); 4unsigned long perf_event_open_cloexec_flag(void);
5 5
6#ifdef __GLIBC_PREREQ 6#ifdef __GLIBC_PREREQ
7#if !__GLIBC_PREREQ(2, 6) 7#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__)
8extern int sched_getcpu(void) __THROW; 8extern int sched_getcpu(void) __THROW;
9#endif 9#endif
10#endif 10#endif
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 55355b3d4f85..9b9565416f90 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -67,8 +67,9 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color,
67 return r; 67 return r;
68} 68}
69 69
70/* Colors are not included in return value */
70static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, 71static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
71 va_list args, const char *trail) 72 va_list args)
72{ 73{
73 int r = 0; 74 int r = 0;
74 75
@@ -83,12 +84,10 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
83 } 84 }
84 85
85 if (perf_use_color_default && *color) 86 if (perf_use_color_default && *color)
86 r += fprintf(fp, "%s", color); 87 fprintf(fp, "%s", color);
87 r += vfprintf(fp, fmt, args); 88 r += vfprintf(fp, fmt, args);
88 if (perf_use_color_default && *color) 89 if (perf_use_color_default && *color)
89 r += fprintf(fp, "%s", PERF_COLOR_RESET); 90 fprintf(fp, "%s", PERF_COLOR_RESET);
90 if (trail)
91 r += fprintf(fp, "%s", trail);
92 return r; 91 return r;
93} 92}
94 93
@@ -100,7 +99,7 @@ int color_vsnprintf(char *bf, size_t size, const char *color,
100 99
101int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) 100int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
102{ 101{
103 return __color_vfprintf(fp, color, fmt, args, NULL); 102 return __color_vfprintf(fp, color, fmt, args);
104} 103}
105 104
106int color_snprintf(char *bf, size_t size, const char *color, 105int color_snprintf(char *bf, size_t size, const char *color,
@@ -126,16 +125,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
126 return r; 125 return r;
127} 126}
128 127
129int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
130{
131 va_list args;
132 int r;
133 va_start(args, fmt);
134 r = __color_vfprintf(fp, color, fmt, args, "\n");
135 va_end(args);
136 return r;
137}
138
139/* 128/*
140 * This function splits the buffer by newlines and colors the lines individually. 129 * This function splits the buffer by newlines and colors the lines individually.
141 * 130 *
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 38146f922c54..a93997f16dec 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -35,7 +35,6 @@ int color_vsnprintf(char *bf, size_t size, const char *color,
35int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); 35int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
36int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); 36int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
37int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); 37int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
38int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
39int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); 38int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
40int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); 39int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
41int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); 40int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index e18f653cd7db..2e452ac1353d 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -12,6 +12,7 @@
12#include "cache.h" 12#include "cache.h"
13#include "exec_cmd.h" 13#include "exec_cmd.h"
14#include "util/hist.h" /* perf_hist_config */ 14#include "util/hist.h" /* perf_hist_config */
15#include "util/llvm-utils.h" /* perf_llvm_config */
15 16
16#define MAXNAME (256) 17#define MAXNAME (256)
17 18
@@ -408,6 +409,9 @@ int perf_default_config(const char *var, const char *value,
408 if (!prefixcmp(var, "call-graph.")) 409 if (!prefixcmp(var, "call-graph."))
409 return perf_callchain_config(var, value); 410 return perf_callchain_config(var, value);
410 411
412 if (!prefixcmp(var, "llvm."))
413 return perf_llvm_config(var, value);
414
411 /* Add other config variables here. */ 415 /* Add other config variables here. */
412 return 0; 416 return 0;
413} 417}
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
new file mode 100644
index 000000000000..e3fde313deb2
--- /dev/null
+++ b/tools/perf/util/counts.c
@@ -0,0 +1,52 @@
1#include <stdlib.h>
2#include "evsel.h"
3#include "counts.h"
4
5struct perf_counts *perf_counts__new(int ncpus, int nthreads)
6{
7 struct perf_counts *counts = zalloc(sizeof(*counts));
8
9 if (counts) {
10 struct xyarray *values;
11
12 values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
13 if (!values) {
14 free(counts);
15 return NULL;
16 }
17
18 counts->values = values;
19 }
20
21 return counts;
22}
23
24void perf_counts__delete(struct perf_counts *counts)
25{
26 if (counts) {
27 xyarray__delete(counts->values);
28 free(counts);
29 }
30}
31
32static void perf_counts__reset(struct perf_counts *counts)
33{
34 xyarray__reset(counts->values);
35}
36
37void perf_evsel__reset_counts(struct perf_evsel *evsel)
38{
39 perf_counts__reset(evsel->counts);
40}
41
42int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads)
43{
44 evsel->counts = perf_counts__new(ncpus, nthreads);
45 return evsel->counts != NULL ? 0 : -ENOMEM;
46}
47
48void perf_evsel__free_counts(struct perf_evsel *evsel)
49{
50 perf_counts__delete(evsel->counts);
51 evsel->counts = NULL;
52}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
new file mode 100644
index 000000000000..34d8baaf558a
--- /dev/null
+++ b/tools/perf/util/counts.h
@@ -0,0 +1,37 @@
1#ifndef __PERF_COUNTS_H
2#define __PERF_COUNTS_H
3
4#include "xyarray.h"
5
6struct perf_counts_values {
7 union {
8 struct {
9 u64 val;
10 u64 ena;
11 u64 run;
12 };
13 u64 values[3];
14 };
15};
16
17struct perf_counts {
18 s8 scaled;
19 struct perf_counts_values aggr;
20 struct xyarray *values;
21};
22
23
24static inline struct perf_counts_values*
25perf_counts(struct perf_counts *counts, int cpu, int thread)
26{
27 return xyarray__entry(counts->values, cpu, thread);
28}
29
30struct perf_counts *perf_counts__new(int ncpus, int nthreads);
31void perf_counts__delete(struct perf_counts *counts);
32
33void perf_evsel__reset_counts(struct perf_evsel *evsel);
34int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads);
35void perf_evsel__free_counts(struct perf_evsel *evsel);
36
37#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 2da5581ec74d..86d9c7302598 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
36 return ret; 36 return ret;
37} 37}
38 38
39int veprintf(int level, int var, const char *fmt, va_list args)
40{
41 return _eprintf(level, var, fmt, args);
42}
43
39int eprintf(int level, int var, const char *fmt, ...) 44int eprintf(int level, int var, const char *fmt, ...)
40{ 45{
41 va_list args; 46 va_list args;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index caac2fdc6105..8b9a088c32ab 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
50 50
51int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); 51int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
52int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); 52int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
53int veprintf(int level, int var, const char *fmt, va_list args);
53 54
54int perf_debug_option(const char *str); 55int perf_debug_option(const char *str);
55 56
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 2fe98bb0e95b..fc8db9c764ac 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -137,6 +137,10 @@ struct dso {
137 struct rb_node rb_node; /* rbtree node sorted by long name */ 137 struct rb_node rb_node; /* rbtree node sorted by long name */
138 struct rb_root symbols[MAP__NR_TYPES]; 138 struct rb_root symbols[MAP__NR_TYPES];
139 struct rb_root symbol_names[MAP__NR_TYPES]; 139 struct rb_root symbol_names[MAP__NR_TYPES];
140 struct {
141 u64 addr;
142 struct symbol *symbol;
143 } last_find_result[MAP__NR_TYPES];
140 void *a2l; 144 void *a2l;
141 char *symsrc_filename; 145 char *symsrc_filename;
142 unsigned int a2l_fails; 146 unsigned int a2l_fails;
@@ -320,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
320struct dso *dsos__findnew(struct dsos *dsos, const char *name); 324struct dso *dsos__findnew(struct dsos *dsos, const char *name);
321bool __dsos__read_build_ids(struct list_head *head, bool with_hits); 325bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
322 326
327void dso__reset_find_symbol_cache(struct dso *dso);
328
323size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, 329size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
324 bool (skip)(struct dso *dso, int parm), int parm); 330 bool (skip)(struct dso *dso, int parm), int parm);
325size_t __dsos__fprintf(struct list_head *head, FILE *fp); 331size_t __dsos__fprintf(struct list_head *head, FILE *fp);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 57f3ef41c2bc..a509aa8433a1 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -734,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
734 Dwarf_Lines *lines; 734 Dwarf_Lines *lines;
735 Dwarf_Line *line; 735 Dwarf_Line *line;
736 Dwarf_Addr addr; 736 Dwarf_Addr addr;
737 const char *fname; 737 const char *fname, *decf = NULL;
738 int lineno, ret = 0; 738 int lineno, ret = 0;
739 int decl = 0, inl;
739 Dwarf_Die die_mem, *cu_die; 740 Dwarf_Die die_mem, *cu_die;
740 size_t nlines, i; 741 size_t nlines, i;
741 742
742 /* Get the CU die */ 743 /* Get the CU die */
743 if (dwarf_tag(rt_die) != DW_TAG_compile_unit) 744 if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
744 cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); 745 cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
745 else 746 dwarf_decl_line(rt_die, &decl);
747 decf = dwarf_decl_file(rt_die);
748 } else
746 cu_die = rt_die; 749 cu_die = rt_die;
747 if (!cu_die) { 750 if (!cu_die) {
748 pr_debug2("Failed to get CU from given DIE.\n"); 751 pr_debug2("Failed to get CU from given DIE.\n");
@@ -767,15 +770,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
767 continue; 770 continue;
768 } 771 }
769 /* Filter lines based on address */ 772 /* Filter lines based on address */
770 if (rt_die != cu_die) 773 if (rt_die != cu_die) {
771 /* 774 /*
772 * Address filtering 775 * Address filtering
773 * The line is included in given function, and 776 * The line is included in given function, and
774 * no inline block includes it. 777 * no inline block includes it.
775 */ 778 */
776 if (!dwarf_haspc(rt_die, addr) || 779 if (!dwarf_haspc(rt_die, addr))
777 die_find_inlinefunc(rt_die, addr, &die_mem))
778 continue; 780 continue;
781 if (die_find_inlinefunc(rt_die, addr, &die_mem)) {
782 dwarf_decl_line(&die_mem, &inl);
783 if (inl != decl ||
784 decf != dwarf_decl_file(&die_mem))
785 continue;
786 }
787 }
779 /* Get source line */ 788 /* Get source line */
780 fname = dwarf_linesrc(line, NULL, NULL); 789 fname = dwarf_linesrc(line, NULL, NULL);
781 790
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 67a977e5d0ab..7ff61274ed57 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -26,6 +26,8 @@ static const char *perf_event__names[] = {
26 [PERF_RECORD_AUX] = "AUX", 26 [PERF_RECORD_AUX] = "AUX",
27 [PERF_RECORD_ITRACE_START] = "ITRACE_START", 27 [PERF_RECORD_ITRACE_START] = "ITRACE_START",
28 [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", 28 [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
29 [PERF_RECORD_SWITCH] = "SWITCH",
30 [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
29 [PERF_RECORD_HEADER_ATTR] = "ATTR", 31 [PERF_RECORD_HEADER_ATTR] = "ATTR",
30 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 32 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
31 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 33 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
749 return machine__process_lost_samples_event(machine, event, sample); 751 return machine__process_lost_samples_event(machine, event, sample);
750} 752}
751 753
754int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
755 union perf_event *event,
756 struct perf_sample *sample __maybe_unused,
757 struct machine *machine)
758{
759 return machine__process_switch_event(machine, event);
760}
761
752size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) 762size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
753{ 763{
754 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", 764 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
827 event->itrace_start.pid, event->itrace_start.tid); 837 event->itrace_start.pid, event->itrace_start.tid);
828} 838}
829 839
840size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
841{
842 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
843 const char *in_out = out ? "OUT" : "IN ";
844
845 if (event->header.type == PERF_RECORD_SWITCH)
846 return fprintf(fp, " %s\n", in_out);
847
848 return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
849 in_out, out ? "next" : "prev",
850 event->context_switch.next_prev_pid,
851 event->context_switch.next_prev_tid);
852}
853
830size_t perf_event__fprintf(union perf_event *event, FILE *fp) 854size_t perf_event__fprintf(union perf_event *event, FILE *fp)
831{ 855{
832 size_t ret = fprintf(fp, "PERF_RECORD_%s", 856 size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
852 case PERF_RECORD_ITRACE_START: 876 case PERF_RECORD_ITRACE_START:
853 ret += perf_event__fprintf_itrace_start(event, fp); 877 ret += perf_event__fprintf_itrace_start(event, fp);
854 break; 878 break;
879 case PERF_RECORD_SWITCH:
880 case PERF_RECORD_SWITCH_CPU_WIDE:
881 ret += perf_event__fprintf_switch(event, fp);
882 break;
855 default: 883 default:
856 ret += fprintf(fp, "\n"); 884 ret += fprintf(fp, "\n");
857 } 885 }
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c53f36384b64..f729df5e25e6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -134,7 +134,8 @@ struct branch_flags {
134 u64 predicted:1; 134 u64 predicted:1;
135 u64 in_tx:1; 135 u64 in_tx:1;
136 u64 abort:1; 136 u64 abort:1;
137 u64 reserved:60; 137 u64 cycles:16;
138 u64 reserved:44;
138}; 139};
139 140
140struct branch_entry { 141struct branch_entry {
@@ -348,6 +349,12 @@ struct itrace_start_event {
348 u32 pid, tid; 349 u32 pid, tid;
349}; 350};
350 351
352struct context_switch_event {
353 struct perf_event_header header;
354 u32 next_prev_pid;
355 u32 next_prev_tid;
356};
357
351union perf_event { 358union perf_event {
352 struct perf_event_header header; 359 struct perf_event_header header;
353 struct mmap_event mmap; 360 struct mmap_event mmap;
@@ -369,6 +376,7 @@ union perf_event {
369 struct auxtrace_error_event auxtrace_error; 376 struct auxtrace_error_event auxtrace_error;
370 struct aux_event aux; 377 struct aux_event aux;
371 struct itrace_start_event itrace_start; 378 struct itrace_start_event itrace_start;
379 struct context_switch_event context_switch;
372}; 380};
373 381
374void perf_event__print_totals(void); 382void perf_event__print_totals(void);
@@ -418,6 +426,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool,
418 union perf_event *event, 426 union perf_event *event,
419 struct perf_sample *sample, 427 struct perf_sample *sample,
420 struct machine *machine); 428 struct machine *machine);
429int perf_event__process_switch(struct perf_tool *tool,
430 union perf_event *event,
431 struct perf_sample *sample,
432 struct machine *machine);
421int perf_event__process_mmap(struct perf_tool *tool, 433int perf_event__process_mmap(struct perf_tool *tool,
422 union perf_event *event, 434 union perf_event *event,
423 struct perf_sample *sample, 435 struct perf_sample *sample,
@@ -480,6 +492,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
480size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); 492size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
481size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); 493size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
482size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); 494size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
495size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
483size_t perf_event__fprintf(union perf_event *event, FILE *fp); 496size_t perf_event__fprintf(union perf_event *event, FILE *fp);
484 497
485u64 kallsyms__get_function_start(const char *kallsyms_filename, 498u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6cfdee68e763..8d00039d6a20 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -98,6 +98,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
98 98
99 evlist__for_each_safe(evlist, n, pos) { 99 evlist__for_each_safe(evlist, n, pos) {
100 list_del_init(&pos->node); 100 list_del_init(&pos->node);
101 pos->evlist = NULL;
101 perf_evsel__delete(pos); 102 perf_evsel__delete(pos);
102 } 103 }
103 104
@@ -125,6 +126,7 @@ void perf_evlist__delete(struct perf_evlist *evlist)
125 126
126void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 127void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
127{ 128{
129 entry->evlist = evlist;
128 list_add_tail(&entry->node, &evlist->entries); 130 list_add_tail(&entry->node, &evlist->entries);
129 entry->idx = evlist->nr_entries; 131 entry->idx = evlist->nr_entries;
130 entry->tracking = !entry->idx; 132 entry->tracking = !entry->idx;
@@ -573,7 +575,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
573{ 575{
574 struct perf_sample_id *sid; 576 struct perf_sample_id *sid;
575 577
576 if (evlist->nr_entries == 1) 578 if (evlist->nr_entries == 1 || !id)
577 return perf_evlist__first(evlist); 579 return perf_evlist__first(evlist);
578 580
579 sid = perf_evlist__id2sid(evlist, id); 581 sid = perf_evlist__id2sid(evlist, id);
@@ -1102,7 +1104,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1102} 1104}
1103 1105
1104static int perf_evlist__propagate_maps(struct perf_evlist *evlist, 1106static int perf_evlist__propagate_maps(struct perf_evlist *evlist,
1105 struct target *target) 1107 bool has_user_cpus)
1106{ 1108{
1107 struct perf_evsel *evsel; 1109 struct perf_evsel *evsel;
1108 1110
@@ -1111,15 +1113,16 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist,
1111 * We already have cpus for evsel (via PMU sysfs) so 1113 * We already have cpus for evsel (via PMU sysfs) so
1112 * keep it, if there's no target cpu list defined. 1114 * keep it, if there's no target cpu list defined.
1113 */ 1115 */
1114 if (evsel->cpus && target->cpu_list) 1116 if (evsel->cpus && has_user_cpus)
1115 cpu_map__put(evsel->cpus); 1117 cpu_map__put(evsel->cpus);
1116 1118
1117 if (!evsel->cpus || target->cpu_list) 1119 if (!evsel->cpus || has_user_cpus)
1118 evsel->cpus = cpu_map__get(evlist->cpus); 1120 evsel->cpus = cpu_map__get(evlist->cpus);
1119 1121
1120 evsel->threads = thread_map__get(evlist->threads); 1122 evsel->threads = thread_map__get(evlist->threads);
1121 1123
1122 if (!evsel->cpus || !evsel->threads) 1124 if ((evlist->cpus && !evsel->cpus) ||
1125 (evlist->threads && !evsel->threads))
1123 return -ENOMEM; 1126 return -ENOMEM;
1124 } 1127 }
1125 1128
@@ -1142,7 +1145,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1142 if (evlist->cpus == NULL) 1145 if (evlist->cpus == NULL)
1143 goto out_delete_threads; 1146 goto out_delete_threads;
1144 1147
1145 return perf_evlist__propagate_maps(evlist, target); 1148 return perf_evlist__propagate_maps(evlist, !!target->cpu_list);
1146 1149
1147out_delete_threads: 1150out_delete_threads:
1148 thread_map__put(evlist->threads); 1151 thread_map__put(evlist->threads);
@@ -1150,6 +1153,23 @@ out_delete_threads:
1150 return -1; 1153 return -1;
1151} 1154}
1152 1155
1156int perf_evlist__set_maps(struct perf_evlist *evlist,
1157 struct cpu_map *cpus,
1158 struct thread_map *threads)
1159{
1160 if (evlist->cpus)
1161 cpu_map__put(evlist->cpus);
1162
1163 evlist->cpus = cpus;
1164
1165 if (evlist->threads)
1166 thread_map__put(evlist->threads);
1167
1168 evlist->threads = threads;
1169
1170 return perf_evlist__propagate_maps(evlist, false);
1171}
1172
1153int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1173int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1154{ 1174{
1155 struct perf_evsel *evsel; 1175 struct perf_evsel *evsel;
@@ -1161,7 +1181,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e
1161 if (evsel->filter == NULL) 1181 if (evsel->filter == NULL)
1162 continue; 1182 continue;
1163 1183
1164 err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); 1184 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1165 if (err) { 1185 if (err) {
1166 *err_evsel = evsel; 1186 *err_evsel = evsel;
1167 break; 1187 break;
@@ -1175,11 +1195,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
1175{ 1195{
1176 struct perf_evsel *evsel; 1196 struct perf_evsel *evsel;
1177 int err = 0; 1197 int err = 0;
1178 const int ncpus = cpu_map__nr(evlist->cpus),
1179 nthreads = thread_map__nr(evlist->threads);
1180 1198
1181 evlist__for_each(evlist, evsel) { 1199 evlist__for_each(evlist, evsel) {
1182 err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); 1200 err = perf_evsel__set_filter(evsel, filter);
1183 if (err) 1201 if (err)
1184 break; 1202 break;
1185 } 1203 }
@@ -1257,6 +1275,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1257 return __perf_evlist__combined_sample_type(evlist); 1275 return __perf_evlist__combined_sample_type(evlist);
1258} 1276}
1259 1277
1278u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1279{
1280 struct perf_evsel *evsel;
1281 u64 branch_type = 0;
1282
1283 evlist__for_each(evlist, evsel)
1284 branch_type |= evsel->attr.branch_sample_type;
1285 return branch_type;
1286}
1287
1260bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1288bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1261{ 1289{
1262 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1290 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 037633c1da9d..b39a6198f4ac 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -56,6 +56,7 @@ struct perf_evlist {
56 struct cpu_map *cpus; 56 struct cpu_map *cpus;
57 struct perf_evsel *selected; 57 struct perf_evsel *selected;
58 struct events_stats stats; 58 struct events_stats stats;
59 struct perf_env *env;
59}; 60};
60 61
61struct perf_evsel_str_handler { 62struct perf_evsel_str_handler {
@@ -114,6 +115,8 @@ void perf_evlist__close(struct perf_evlist *evlist);
114 115
115void perf_evlist__set_id_pos(struct perf_evlist *evlist); 116void perf_evlist__set_id_pos(struct perf_evlist *evlist);
116bool perf_can_sample_identifier(void); 117bool perf_can_sample_identifier(void);
118bool perf_can_record_switch_events(void);
119bool perf_can_record_cpu_wide(void);
117void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); 120void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
118int record_opts__config(struct record_opts *opts); 121int record_opts__config(struct record_opts *opts);
119 122
@@ -152,14 +155,9 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
152void perf_evlist__set_selected(struct perf_evlist *evlist, 155void perf_evlist__set_selected(struct perf_evlist *evlist,
153 struct perf_evsel *evsel); 156 struct perf_evsel *evsel);
154 157
155static inline void perf_evlist__set_maps(struct perf_evlist *evlist, 158int perf_evlist__set_maps(struct perf_evlist *evlist,
156 struct cpu_map *cpus, 159 struct cpu_map *cpus,
157 struct thread_map *threads) 160 struct thread_map *threads);
158{
159 evlist->cpus = cpus;
160 evlist->threads = threads;
161}
162
163int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); 161int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
164int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); 162int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
165 163
@@ -169,6 +167,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
169u64 perf_evlist__read_format(struct perf_evlist *evlist); 167u64 perf_evlist__read_format(struct perf_evlist *evlist);
170u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); 168u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
171u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); 169u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
170u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
172bool perf_evlist__sample_id_all(struct perf_evlist *evlist); 171bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
173u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); 172u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
174 173
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2936b3080722..bac25f41a751 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -206,10 +206,13 @@ void perf_evsel__init(struct perf_evsel *evsel,
206 evsel->leader = evsel; 206 evsel->leader = evsel;
207 evsel->unit = ""; 207 evsel->unit = "";
208 evsel->scale = 1.0; 208 evsel->scale = 1.0;
209 evsel->evlist = NULL;
209 INIT_LIST_HEAD(&evsel->node); 210 INIT_LIST_HEAD(&evsel->node);
211 INIT_LIST_HEAD(&evsel->config_terms);
210 perf_evsel__object.init(evsel); 212 perf_evsel__object.init(evsel);
211 evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); 213 evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
212 perf_evsel__calc_id_pos(evsel); 214 perf_evsel__calc_id_pos(evsel);
215 evsel->cmdline_group_boundary = false;
213} 216}
214 217
215struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) 218struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -543,14 +546,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
543 546
544static void 547static void
545perf_evsel__config_callgraph(struct perf_evsel *evsel, 548perf_evsel__config_callgraph(struct perf_evsel *evsel,
546 struct record_opts *opts) 549 struct record_opts *opts,
550 struct callchain_param *param)
547{ 551{
548 bool function = perf_evsel__is_function_event(evsel); 552 bool function = perf_evsel__is_function_event(evsel);
549 struct perf_event_attr *attr = &evsel->attr; 553 struct perf_event_attr *attr = &evsel->attr;
550 554
551 perf_evsel__set_sample_bit(evsel, CALLCHAIN); 555 perf_evsel__set_sample_bit(evsel, CALLCHAIN);
552 556
553 if (callchain_param.record_mode == CALLCHAIN_LBR) { 557 if (param->record_mode == CALLCHAIN_LBR) {
554 if (!opts->branch_stack) { 558 if (!opts->branch_stack) {
555 if (attr->exclude_user) { 559 if (attr->exclude_user) {
556 pr_warning("LBR callstack option is only available " 560 pr_warning("LBR callstack option is only available "
@@ -566,12 +570,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
566 "Falling back to framepointers.\n"); 570 "Falling back to framepointers.\n");
567 } 571 }
568 572
569 if (callchain_param.record_mode == CALLCHAIN_DWARF) { 573 if (param->record_mode == CALLCHAIN_DWARF) {
570 if (!function) { 574 if (!function) {
571 perf_evsel__set_sample_bit(evsel, REGS_USER); 575 perf_evsel__set_sample_bit(evsel, REGS_USER);
572 perf_evsel__set_sample_bit(evsel, STACK_USER); 576 perf_evsel__set_sample_bit(evsel, STACK_USER);
573 attr->sample_regs_user = PERF_REGS_MASK; 577 attr->sample_regs_user = PERF_REGS_MASK;
574 attr->sample_stack_user = callchain_param.dump_size; 578 attr->sample_stack_user = param->dump_size;
575 attr->exclude_callchain_user = 1; 579 attr->exclude_callchain_user = 1;
576 } else { 580 } else {
577 pr_info("Cannot use DWARF unwind for function trace event," 581 pr_info("Cannot use DWARF unwind for function trace event,"
@@ -585,6 +589,97 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
585 } 589 }
586} 590}
587 591
592static void
593perf_evsel__reset_callgraph(struct perf_evsel *evsel,
594 struct callchain_param *param)
595{
596 struct perf_event_attr *attr = &evsel->attr;
597
598 perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
599 if (param->record_mode == CALLCHAIN_LBR) {
600 perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
601 attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
602 PERF_SAMPLE_BRANCH_CALL_STACK);
603 }
604 if (param->record_mode == CALLCHAIN_DWARF) {
605 perf_evsel__reset_sample_bit(evsel, REGS_USER);
606 perf_evsel__reset_sample_bit(evsel, STACK_USER);
607 }
608}
609
610static void apply_config_terms(struct perf_evsel *evsel,
611 struct record_opts *opts)
612{
613 struct perf_evsel_config_term *term;
614 struct list_head *config_terms = &evsel->config_terms;
615 struct perf_event_attr *attr = &evsel->attr;
616 struct callchain_param param;
617 u32 dump_size = 0;
618 char *callgraph_buf = NULL;
619
620 /* callgraph default */
621 param.record_mode = callchain_param.record_mode;
622
623 list_for_each_entry(term, config_terms, list) {
624 switch (term->type) {
625 case PERF_EVSEL__CONFIG_TERM_PERIOD:
626 attr->sample_period = term->val.period;
627 attr->freq = 0;
628 break;
629 case PERF_EVSEL__CONFIG_TERM_FREQ:
630 attr->sample_freq = term->val.freq;
631 attr->freq = 1;
632 break;
633 case PERF_EVSEL__CONFIG_TERM_TIME:
634 if (term->val.time)
635 perf_evsel__set_sample_bit(evsel, TIME);
636 else
637 perf_evsel__reset_sample_bit(evsel, TIME);
638 break;
639 case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
640 callgraph_buf = term->val.callgraph;
641 break;
642 case PERF_EVSEL__CONFIG_TERM_STACK_USER:
643 dump_size = term->val.stack_user;
644 break;
645 default:
646 break;
647 }
648 }
649
650 /* User explicitly set per-event callgraph, clear the old setting and reset. */
651 if ((callgraph_buf != NULL) || (dump_size > 0)) {
652
653 /* parse callgraph parameters */
654 if (callgraph_buf != NULL) {
655 if (!strcmp(callgraph_buf, "no")) {
656 param.enabled = false;
657 param.record_mode = CALLCHAIN_NONE;
658 } else {
659 param.enabled = true;
660 if (parse_callchain_record(callgraph_buf, &param)) {
661 pr_err("per-event callgraph setting for %s failed. "
662 "Apply callgraph global setting for it\n",
663 evsel->name);
664 return;
665 }
666 }
667 }
668 if (dump_size > 0) {
669 dump_size = round_up(dump_size, sizeof(u64));
670 param.dump_size = dump_size;
671 }
672
673 /* If global callgraph set, clear it */
674 if (callchain_param.enabled)
675 perf_evsel__reset_callgraph(evsel, &callchain_param);
676
677 /* set perf-event callgraph */
678 if (param.enabled)
679 perf_evsel__config_callgraph(evsel, opts, &param);
680 }
681}
682
588/* 683/*
589 * The enable_on_exec/disabled value strategy: 684 * The enable_on_exec/disabled value strategy:
590 * 685 *
@@ -689,7 +784,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
689 evsel->attr.exclude_callchain_user = 1; 784 evsel->attr.exclude_callchain_user = 1;
690 785
691 if (callchain_param.enabled && !evsel->no_aux_samples) 786 if (callchain_param.enabled && !evsel->no_aux_samples)
692 perf_evsel__config_callgraph(evsel, opts); 787 perf_evsel__config_callgraph(evsel, opts, &callchain_param);
693 788
694 if (opts->sample_intr_regs) { 789 if (opts->sample_intr_regs) {
695 attr->sample_regs_intr = PERF_REGS_MASK; 790 attr->sample_regs_intr = PERF_REGS_MASK;
@@ -707,7 +802,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
707 */ 802 */
708 if (opts->sample_time && 803 if (opts->sample_time &&
709 (!perf_missing_features.sample_id_all && 804 (!perf_missing_features.sample_id_all &&
710 (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu))) 805 (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
806 opts->sample_time_set)))
711 perf_evsel__set_sample_bit(evsel, TIME); 807 perf_evsel__set_sample_bit(evsel, TIME);
712 808
713 if (opts->raw_samples && !evsel->no_aux_samples) { 809 if (opts->raw_samples && !evsel->no_aux_samples) {
@@ -736,6 +832,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
736 attr->mmap2 = track && !perf_missing_features.mmap2; 832 attr->mmap2 = track && !perf_missing_features.mmap2;
737 attr->comm = track; 833 attr->comm = track;
738 834
835 if (opts->record_switch_events)
836 attr->context_switch = track;
837
739 if (opts->sample_transaction) 838 if (opts->sample_transaction)
740 perf_evsel__set_sample_bit(evsel, TRANSACTION); 839 perf_evsel__set_sample_bit(evsel, TRANSACTION);
741 840
@@ -772,6 +871,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
772 attr->use_clockid = 1; 871 attr->use_clockid = 1;
773 attr->clockid = opts->clockid; 872 attr->clockid = opts->clockid;
774 } 873 }
874
875 /*
876 * Apply event specific term settings,
877 * it overloads any global configuration.
878 */
879 apply_config_terms(evsel, opts);
775} 880}
776 881
777static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 882static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -815,14 +920,44 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea
815 return 0; 920 return 0;
816} 921}
817 922
818int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, 923int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
819 const char *filter) 924 const char *filter)
820{ 925{
821 return perf_evsel__run_ioctl(evsel, ncpus, nthreads, 926 return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
822 PERF_EVENT_IOC_SET_FILTER, 927 PERF_EVENT_IOC_SET_FILTER,
823 (void *)filter); 928 (void *)filter);
824} 929}
825 930
931int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
932{
933 char *new_filter = strdup(filter);
934
935 if (new_filter != NULL) {
936 free(evsel->filter);
937 evsel->filter = new_filter;
938 return 0;
939 }
940
941 return -1;
942}
943
944int perf_evsel__append_filter(struct perf_evsel *evsel,
945 const char *op, const char *filter)
946{
947 char *new_filter;
948
949 if (evsel->filter == NULL)
950 return perf_evsel__set_filter(evsel, filter);
951
952 if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) {
953 free(evsel->filter);
954 evsel->filter = new_filter;
955 return 0;
956 }
957
958 return -1;
959}
960
826int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) 961int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
827{ 962{
828 return perf_evsel__run_ioctl(evsel, ncpus, nthreads, 963 return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
@@ -865,6 +1000,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
865 zfree(&evsel->id); 1000 zfree(&evsel->id);
866} 1001}
867 1002
1003static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
1004{
1005 struct perf_evsel_config_term *term, *h;
1006
1007 list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
1008 list_del(&term->list);
1009 free(term);
1010 }
1011}
1012
868void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 1013void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
869{ 1014{
870 int cpu, thread; 1015 int cpu, thread;
@@ -882,8 +1027,10 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
882void perf_evsel__exit(struct perf_evsel *evsel) 1027void perf_evsel__exit(struct perf_evsel *evsel)
883{ 1028{
884 assert(list_empty(&evsel->node)); 1029 assert(list_empty(&evsel->node));
1030 assert(evsel->evlist == NULL);
885 perf_evsel__free_fd(evsel); 1031 perf_evsel__free_fd(evsel);
886 perf_evsel__free_id(evsel); 1032 perf_evsel__free_id(evsel);
1033 perf_evsel__free_config_terms(evsel);
887 close_cgroup(evsel->cgrp); 1034 close_cgroup(evsel->cgrp);
888 cpu_map__put(evsel->cpus); 1035 cpu_map__put(evsel->cpus);
889 thread_map__put(evsel->threads); 1036 thread_map__put(evsel->threads);
@@ -1095,6 +1242,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
1095 PRINT_ATTRf(mmap2, p_unsigned); 1242 PRINT_ATTRf(mmap2, p_unsigned);
1096 PRINT_ATTRf(comm_exec, p_unsigned); 1243 PRINT_ATTRf(comm_exec, p_unsigned);
1097 PRINT_ATTRf(use_clockid, p_unsigned); 1244 PRINT_ATTRf(use_clockid, p_unsigned);
1245 PRINT_ATTRf(context_switch, p_unsigned);
1098 1246
1099 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); 1247 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
1100 PRINT_ATTRf(bp_type, p_unsigned); 1248 PRINT_ATTRf(bp_type, p_unsigned);
@@ -2075,8 +2223,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
2075 printed += perf_event_attr__fprintf(fp, &evsel->attr, 2223 printed += perf_event_attr__fprintf(fp, &evsel->attr,
2076 __print_attr__fprintf, &first); 2224 __print_attr__fprintf, &first);
2077 } else if (details->freq) { 2225 } else if (details->freq) {
2078 printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, 2226 const char *term = "sample_freq";
2079 (u64)evsel->attr.sample_freq); 2227
2228 if (!evsel->attr.freq)
2229 term = "sample_period";
2230
2231 printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
2232 term, (u64)evsel->attr.sample_freq);
2080 } 2233 }
2081out: 2234out:
2082 fputc('\n', fp); 2235 fputc('\n', fp);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4a7ed5656cf0..298e6bbca200 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -9,7 +9,7 @@
9#include "xyarray.h" 9#include "xyarray.h"
10#include "symbol.h" 10#include "symbol.h"
11#include "cpumap.h" 11#include "cpumap.h"
12#include "stat.h" 12#include "counts.h"
13 13
14struct perf_evsel; 14struct perf_evsel;
15 15
@@ -31,8 +31,38 @@ struct perf_sample_id {
31 31
32struct cgroup_sel; 32struct cgroup_sel;
33 33
34/*
35 * The 'struct perf_evsel_config_term' is used to pass event
36 * specific configuration data to perf_evsel__config routine.
37 * It is allocated within event parsing and attached to
38 * perf_evsel::config_terms list head.
39*/
40enum {
41 PERF_EVSEL__CONFIG_TERM_PERIOD,
42 PERF_EVSEL__CONFIG_TERM_FREQ,
43 PERF_EVSEL__CONFIG_TERM_TIME,
44 PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
45 PERF_EVSEL__CONFIG_TERM_STACK_USER,
46 PERF_EVSEL__CONFIG_TERM_MAX,
47};
48
49struct perf_evsel_config_term {
50 struct list_head list;
51 int type;
52 union {
53 u64 period;
54 u64 freq;
55 bool time;
56 char *callgraph;
57 u64 stack_user;
58 } val;
59};
60
34/** struct perf_evsel - event selector 61/** struct perf_evsel - event selector
35 * 62 *
63 * @evlist - evlist this evsel is in, if it is in one.
64 * @node - To insert it into evlist->entries or in other list_heads, say in
65 * the event parsing routines.
36 * @name - Can be set to retain the original event name passed by the user, 66 * @name - Can be set to retain the original event name passed by the user,
37 * so that when showing results in tools such as 'perf stat', we 67 * so that when showing results in tools such as 'perf stat', we
38 * show the name used, not some alias. 68 * show the name used, not some alias.
@@ -46,6 +76,7 @@ struct cgroup_sel;
46 */ 76 */
47struct perf_evsel { 77struct perf_evsel {
48 struct list_head node; 78 struct list_head node;
79 struct perf_evlist *evlist;
49 struct perf_event_attr attr; 80 struct perf_event_attr attr;
50 char *filter; 81 char *filter;
51 struct xyarray *fd; 82 struct xyarray *fd;
@@ -86,6 +117,8 @@ struct perf_evsel {
86 unsigned long *per_pkg_mask; 117 unsigned long *per_pkg_mask;
87 struct perf_evsel *leader; 118 struct perf_evsel *leader;
88 char *group_name; 119 char *group_name;
120 bool cmdline_group_boundary;
121 struct list_head config_terms;
89}; 122};
90 123
91union u64_swap { 124union u64_swap {
@@ -182,8 +215,11 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
182void perf_evsel__set_sample_id(struct perf_evsel *evsel, 215void perf_evsel__set_sample_id(struct perf_evsel *evsel,
183 bool use_sample_identifier); 216 bool use_sample_identifier);
184 217
185int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, 218int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
186 const char *filter); 219int perf_evsel__append_filter(struct perf_evsel *evsel,
220 const char *op, const char *filter);
221int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
222 const char *filter);
187int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); 223int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads);
188 224
189int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 225int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 03ace57a800c..41814547da15 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -923,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused,
923 FILE *fp) 923 FILE *fp)
924{ 924{
925 int nr, i; 925 int nr, i;
926 char *str;
927 926
928 nr = ph->env.nr_cmdline; 927 nr = ph->env.nr_cmdline;
929 str = ph->env.cmdline;
930 928
931 fprintf(fp, "# cmdline : "); 929 fprintf(fp, "# cmdline : ");
932 930
933 for (i = 0; i < nr; i++) { 931 for (i = 0; i < nr; i++)
934 fprintf(fp, "%s ", str); 932 fprintf(fp, "%s ", ph->env.cmdline_argv[i]);
935 str += strlen(str) + 1;
936 }
937 fputc('\n', fp); 933 fputc('\n', fp);
938} 934}
939 935
@@ -1541,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused,
1541 return 0; 1537 return 0;
1542} 1538}
1543 1539
1544static int process_cmdline(struct perf_file_section *section __maybe_unused, 1540static int process_cmdline(struct perf_file_section *section,
1545 struct perf_header *ph, int fd, 1541 struct perf_header *ph, int fd,
1546 void *data __maybe_unused) 1542 void *data __maybe_unused)
1547{ 1543{
1548 ssize_t ret; 1544 ssize_t ret;
1549 char *str; 1545 char *str, *cmdline = NULL, **argv = NULL;
1550 u32 nr, i; 1546 u32 nr, i, len = 0;
1551 struct strbuf sb;
1552 1547
1553 ret = readn(fd, &nr, sizeof(nr)); 1548 ret = readn(fd, &nr, sizeof(nr));
1554 if (ret != sizeof(nr)) 1549 if (ret != sizeof(nr))
@@ -1558,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused,
1558 nr = bswap_32(nr); 1553 nr = bswap_32(nr);
1559 1554
1560 ph->env.nr_cmdline = nr; 1555 ph->env.nr_cmdline = nr;
1561 strbuf_init(&sb, 128); 1556
1557 cmdline = zalloc(section->size + nr + 1);
1558 if (!cmdline)
1559 return -1;
1560
1561 argv = zalloc(sizeof(char *) * (nr + 1));
1562 if (!argv)
1563 goto error;
1562 1564
1563 for (i = 0; i < nr; i++) { 1565 for (i = 0; i < nr; i++) {
1564 str = do_read_string(fd, ph); 1566 str = do_read_string(fd, ph);
1565 if (!str) 1567 if (!str)
1566 goto error; 1568 goto error;
1567 1569
1568 /* include a NULL character at the end */ 1570 argv[i] = cmdline + len;
1569 strbuf_add(&sb, str, strlen(str) + 1); 1571 memcpy(argv[i], str, strlen(str) + 1);
1572 len += strlen(str) + 1;
1570 free(str); 1573 free(str);
1571 } 1574 }
1572 ph->env.cmdline = strbuf_detach(&sb, NULL); 1575 ph->env.cmdline = cmdline;
1576 ph->env.cmdline_argv = (const char **) argv;
1573 return 0; 1577 return 0;
1574 1578
1575error: 1579error:
1576 strbuf_release(&sb); 1580 free(argv);
1581 free(cmdline);
1577 return -1; 1582 return -1;
1578} 1583}
1579 1584
@@ -2509,6 +2514,7 @@ int perf_session__read_header(struct perf_session *session)
2509 if (session->evlist == NULL) 2514 if (session->evlist == NULL)
2510 return -ENOMEM; 2515 return -ENOMEM;
2511 2516
2517 session->evlist->env = &header->env;
2512 if (perf_data_file__is_pipe(file)) 2518 if (perf_data_file__is_pipe(file))
2513 return perf_header__read_pipe(session); 2519 return perf_header__read_pipe(session);
2514 2520
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d4d57962c591..396e4965f0c9 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -66,7 +66,7 @@ struct perf_header;
66int perf_file_header__read(struct perf_file_header *header, 66int perf_file_header__read(struct perf_file_header *header,
67 struct perf_header *ph, int fd); 67 struct perf_header *ph, int fd);
68 68
69struct perf_session_env { 69struct perf_env {
70 char *hostname; 70 char *hostname;
71 char *os_release; 71 char *os_release;
72 char *version; 72 char *version;
@@ -84,6 +84,7 @@ struct perf_session_env {
84 int nr_pmu_mappings; 84 int nr_pmu_mappings;
85 int nr_groups; 85 int nr_groups;
86 char *cmdline; 86 char *cmdline;
87 const char **cmdline_argv;
87 char *sibling_cores; 88 char *sibling_cores;
88 char *sibling_threads; 89 char *sibling_threads;
89 char *numa_nodes; 90 char *numa_nodes;
@@ -97,7 +98,7 @@ struct perf_header {
97 u64 data_size; 98 u64 data_size;
98 u64 feat_offset; 99 u64 feat_offset;
99 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); 100 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
100 struct perf_session_env env; 101 struct perf_env env;
101}; 102};
102 103
103struct perf_evlist; 104struct perf_evlist;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6f28d53d4e46..08b6cd945f1e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -151,6 +151,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
151 hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); 151 hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
152 hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); 152 hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
153 153
154 if (h->srcline)
155 hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline));
156
157 if (h->srcfile)
158 hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
159
154 if (h->transaction) 160 if (h->transaction)
155 hists__new_col_len(hists, HISTC_TRANSACTION, 161 hists__new_col_len(hists, HISTC_TRANSACTION,
156 hist_entry__transaction_len()); 162 hist_entry__transaction_len());
@@ -618,7 +624,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
618 * and not events sampled. Thus we use a pseudo period of 1. 624 * and not events sampled. Thus we use a pseudo period of 1.
619 */ 625 */
620 he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, 626 he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
621 1, 1, 0, true); 627 1, bi->flags.cycles ? bi->flags.cycles : 1,
628 0, true);
622 if (he == NULL) 629 if (he == NULL)
623 return -ENOMEM; 630 return -ENOMEM;
624 631
@@ -760,6 +767,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
760 struct hist_entry **he_cache = iter->priv; 767 struct hist_entry **he_cache = iter->priv;
761 struct hist_entry *he; 768 struct hist_entry *he;
762 struct hist_entry he_tmp = { 769 struct hist_entry he_tmp = {
770 .hists = evsel__hists(evsel),
763 .cpu = al->cpu, 771 .cpu = al->cpu,
764 .thread = al->thread, 772 .thread = al->thread,
765 .comm = thread__comm(al->thread), 773 .comm = thread__comm(al->thread),
@@ -944,6 +952,8 @@ void hist_entry__delete(struct hist_entry *he)
944 952
945 zfree(&he->stat_acc); 953 zfree(&he->stat_acc);
946 free_srcline(he->srcline); 954 free_srcline(he->srcline);
955 if (he->srcfile && he->srcfile[0])
956 free(he->srcfile);
947 free_callchain(he->callchain); 957 free_callchain(he->callchain);
948 free(he); 958 free(he);
949} 959}
@@ -1099,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
1099 1109
1100static void __hists__insert_output_entry(struct rb_root *entries, 1110static void __hists__insert_output_entry(struct rb_root *entries,
1101 struct hist_entry *he, 1111 struct hist_entry *he,
1102 u64 min_callchain_hits) 1112 u64 min_callchain_hits,
1113 bool use_callchain)
1103{ 1114{
1104 struct rb_node **p = &entries->rb_node; 1115 struct rb_node **p = &entries->rb_node;
1105 struct rb_node *parent = NULL; 1116 struct rb_node *parent = NULL;
1106 struct hist_entry *iter; 1117 struct hist_entry *iter;
1107 1118
1108 if (symbol_conf.use_callchain) 1119 if (use_callchain)
1109 callchain_param.sort(&he->sorted_chain, he->callchain, 1120 callchain_param.sort(&he->sorted_chain, he->callchain,
1110 min_callchain_hits, &callchain_param); 1121 min_callchain_hits, &callchain_param);
1111 1122
@@ -1129,6 +1140,13 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
1129 struct rb_node *next; 1140 struct rb_node *next;
1130 struct hist_entry *n; 1141 struct hist_entry *n;
1131 u64 min_callchain_hits; 1142 u64 min_callchain_hits;
1143 struct perf_evsel *evsel = hists_to_evsel(hists);
1144 bool use_callchain;
1145
1146 if (evsel && !symbol_conf.show_ref_callgraph)
1147 use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
1148 else
1149 use_callchain = symbol_conf.use_callchain;
1132 1150
1133 min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); 1151 min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
1134 1152
@@ -1147,7 +1165,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
1147 n = rb_entry(next, struct hist_entry, rb_node_in); 1165 n = rb_entry(next, struct hist_entry, rb_node_in);
1148 next = rb_next(&n->rb_node_in); 1166 next = rb_next(&n->rb_node_in);
1149 1167
1150 __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); 1168 __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
1151 hists__inc_stats(hists, n); 1169 hists__inc_stats(hists, n);
1152 1170
1153 if (!n->filtered) 1171 if (!n->filtered)
@@ -1414,6 +1432,39 @@ int hists__link(struct hists *leader, struct hists *other)
1414 return 0; 1432 return 0;
1415} 1433}
1416 1434
1435void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
1436 struct perf_sample *sample, bool nonany_branch_mode)
1437{
1438 struct branch_info *bi;
1439
1440 /* If we have branch cycles always annotate them. */
1441 if (bs && bs->nr && bs->entries[0].flags.cycles) {
1442 int i;
1443
1444 bi = sample__resolve_bstack(sample, al);
1445 if (bi) {
1446 struct addr_map_symbol *prev = NULL;
1447
1448 /*
1449 * Ignore errors, still want to process the
1450 * other entries.
1451 *
1452 * For non standard branch modes always
1453 * force no IPC (prev == NULL)
1454 *
1455 * Note that perf stores branches reversed from
1456 * program order!
1457 */
1458 for (i = bs->nr - 1; i >= 0; i--) {
1459 addr_map_symbol__account_cycles(&bi[i].from,
1460 nonany_branch_mode ? NULL : prev,
1461 bi[i].flags.cycles);
1462 prev = &bi[i].to;
1463 }
1464 free(bi);
1465 }
1466 }
1467}
1417 1468
1418size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) 1469size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
1419{ 1470{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5ed8d9c22981..de6d58e7f0d5 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -30,6 +30,7 @@ enum hist_column {
30 HISTC_PARENT, 30 HISTC_PARENT,
31 HISTC_CPU, 31 HISTC_CPU,
32 HISTC_SRCLINE, 32 HISTC_SRCLINE,
33 HISTC_SRCFILE,
33 HISTC_MISPREDICT, 34 HISTC_MISPREDICT,
34 HISTC_IN_TX, 35 HISTC_IN_TX,
35 HISTC_ABORT, 36 HISTC_ABORT,
@@ -47,6 +48,7 @@ enum hist_column {
47 HISTC_MEM_SNOOP, 48 HISTC_MEM_SNOOP,
48 HISTC_MEM_DCACHELINE, 49 HISTC_MEM_DCACHELINE,
49 HISTC_TRANSACTION, 50 HISTC_TRANSACTION,
51 HISTC_CYCLES,
50 HISTC_NR_COLS, /* Last entry */ 52 HISTC_NR_COLS, /* Last entry */
51}; 53};
52 54
@@ -311,7 +313,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
311int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 313int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
312 struct hist_browser_timer *hbt, 314 struct hist_browser_timer *hbt,
313 float min_pcnt, 315 float min_pcnt,
314 struct perf_session_env *env); 316 struct perf_env *env);
315int script_browse(const char *script_opt); 317int script_browse(const char *script_opt);
316#else 318#else
317static inline 319static inline
@@ -319,7 +321,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
319 const char *help __maybe_unused, 321 const char *help __maybe_unused,
320 struct hist_browser_timer *hbt __maybe_unused, 322 struct hist_browser_timer *hbt __maybe_unused,
321 float min_pcnt __maybe_unused, 323 float min_pcnt __maybe_unused,
322 struct perf_session_env *env __maybe_unused) 324 struct perf_env *env __maybe_unused)
323{ 325{
324 return 0; 326 return 0;
325} 327}
@@ -349,6 +351,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
349 351
350unsigned int hists__sort_list_width(struct hists *hists); 352unsigned int hists__sort_list_width(struct hists *hists);
351 353
354void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
355 struct perf_sample *sample, bool nonany_branch_mode);
356
352struct option; 357struct option;
353int parse_filter_percentage(const struct option *opt __maybe_unused, 358int parse_filter_percentage(const struct option *opt __maybe_unused,
354 const char *arg, int unset __maybe_unused); 359 const char *arg, int unset __maybe_unused);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000000..ea768625ab5b
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,933 @@
1/*
2 * intel-bts.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <endian.h>
17#include <byteswap.h>
18#include <linux/kernel.h>
19#include <linux/types.h>
20#include <linux/bitops.h>
21#include <linux/log2.h>
22
23#include "cpumap.h"
24#include "color.h"
25#include "evsel.h"
26#include "evlist.h"
27#include "machine.h"
28#include "session.h"
29#include "util.h"
30#include "thread.h"
31#include "thread-stack.h"
32#include "debug.h"
33#include "tsc.h"
34#include "auxtrace.h"
35#include "intel-pt-decoder/intel-pt-insn-decoder.h"
36#include "intel-bts.h"
37
38#define MAX_TIMESTAMP (~0ULL)
39
40#define INTEL_BTS_ERR_NOINSN 5
41#define INTEL_BTS_ERR_LOST 9
42
43#if __BYTE_ORDER == __BIG_ENDIAN
44#define le64_to_cpu bswap_64
45#else
46#define le64_to_cpu
47#endif
48
49struct intel_bts {
50 struct auxtrace auxtrace;
51 struct auxtrace_queues queues;
52 struct auxtrace_heap heap;
53 u32 auxtrace_type;
54 struct perf_session *session;
55 struct machine *machine;
56 bool sampling_mode;
57 bool snapshot_mode;
58 bool data_queued;
59 u32 pmu_type;
60 struct perf_tsc_conversion tc;
61 bool cap_user_time_zero;
62 struct itrace_synth_opts synth_opts;
63 bool sample_branches;
64 u32 branches_filter;
65 u64 branches_sample_type;
66 u64 branches_id;
67 size_t branches_event_size;
68 bool synth_needs_swap;
69};
70
71struct intel_bts_queue {
72 struct intel_bts *bts;
73 unsigned int queue_nr;
74 struct auxtrace_buffer *buffer;
75 bool on_heap;
76 bool done;
77 pid_t pid;
78 pid_t tid;
79 int cpu;
80 u64 time;
81 struct intel_pt_insn intel_pt_insn;
82 u32 sample_flags;
83};
84
85struct branch {
86 u64 from;
87 u64 to;
88 u64 misc;
89};
90
91static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
92 unsigned char *buf, size_t len)
93{
94 struct branch *branch;
95 size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
96 const char *color = PERF_COLOR_BLUE;
97
98 color_fprintf(stdout, color,
99 ". ... Intel BTS data: size %zu bytes\n",
100 len);
101
102 while (len) {
103 if (len >= br_sz)
104 sz = br_sz;
105 else
106 sz = len;
107 printf(".");
108 color_fprintf(stdout, color, " %08x: ", pos);
109 for (i = 0; i < sz; i++)
110 color_fprintf(stdout, color, " %02x", buf[i]);
111 for (; i < br_sz; i++)
112 color_fprintf(stdout, color, " ");
113 if (len >= br_sz) {
114 branch = (struct branch *)buf;
115 color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
116 le64_to_cpu(branch->from),
117 le64_to_cpu(branch->to),
118 le64_to_cpu(branch->misc) & 0x10 ?
119 "pred" : "miss");
120 } else {
121 color_fprintf(stdout, color, " Bad record!\n");
122 }
123 pos += sz;
124 buf += sz;
125 len -= sz;
126 }
127}
128
129static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
130 size_t len)
131{
132 printf(".\n");
133 intel_bts_dump(bts, buf, len);
134}
135
136static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
137{
138 union perf_event event;
139 int err;
140
141 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
142 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
143 sample->tid, 0, "Lost trace data");
144
145 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
146 if (err)
147 pr_err("Intel BTS: failed to deliver error event, error %d\n",
148 err);
149
150 return err;
151}
152
153static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
154 unsigned int queue_nr)
155{
156 struct intel_bts_queue *btsq;
157
158 btsq = zalloc(sizeof(struct intel_bts_queue));
159 if (!btsq)
160 return NULL;
161
162 btsq->bts = bts;
163 btsq->queue_nr = queue_nr;
164 btsq->pid = -1;
165 btsq->tid = -1;
166 btsq->cpu = -1;
167
168 return btsq;
169}
170
171static int intel_bts_setup_queue(struct intel_bts *bts,
172 struct auxtrace_queue *queue,
173 unsigned int queue_nr)
174{
175 struct intel_bts_queue *btsq = queue->priv;
176
177 if (list_empty(&queue->head))
178 return 0;
179
180 if (!btsq) {
181 btsq = intel_bts_alloc_queue(bts, queue_nr);
182 if (!btsq)
183 return -ENOMEM;
184 queue->priv = btsq;
185
186 if (queue->cpu != -1)
187 btsq->cpu = queue->cpu;
188 btsq->tid = queue->tid;
189 }
190
191 if (bts->sampling_mode)
192 return 0;
193
194 if (!btsq->on_heap && !btsq->buffer) {
195 int ret;
196
197 btsq->buffer = auxtrace_buffer__next(queue, NULL);
198 if (!btsq->buffer)
199 return 0;
200
201 ret = auxtrace_heap__add(&bts->heap, queue_nr,
202 btsq->buffer->reference);
203 if (ret)
204 return ret;
205 btsq->on_heap = true;
206 }
207
208 return 0;
209}
210
211static int intel_bts_setup_queues(struct intel_bts *bts)
212{
213 unsigned int i;
214 int ret;
215
216 for (i = 0; i < bts->queues.nr_queues; i++) {
217 ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
218 i);
219 if (ret)
220 return ret;
221 }
222 return 0;
223}
224
225static inline int intel_bts_update_queues(struct intel_bts *bts)
226{
227 if (bts->queues.new_data) {
228 bts->queues.new_data = false;
229 return intel_bts_setup_queues(bts);
230 }
231 return 0;
232}
233
234static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
235 unsigned char *buf_b, size_t len_b)
236{
237 size_t offs, len;
238
239 if (len_a > len_b)
240 offs = len_a - len_b;
241 else
242 offs = 0;
243
244 for (; offs < len_a; offs += sizeof(struct branch)) {
245 len = len_a - offs;
246 if (!memcmp(buf_a + offs, buf_b, len))
247 return buf_b + len;
248 }
249
250 return buf_b;
251}
252
253static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
254 struct auxtrace_buffer *b)
255{
256 struct auxtrace_buffer *a;
257 void *start;
258
259 if (b->list.prev == &queue->head)
260 return 0;
261 a = list_entry(b->list.prev, struct auxtrace_buffer, list);
262 start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
263 if (!start)
264 return -EINVAL;
265 b->use_size = b->data + b->size - start;
266 b->use_data = start;
267 return 0;
268}
269
270static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
271 struct branch *branch)
272{
273 int ret;
274 struct intel_bts *bts = btsq->bts;
275 union perf_event event;
276 struct perf_sample sample = { .ip = 0, };
277
278 event.sample.header.type = PERF_RECORD_SAMPLE;
279 event.sample.header.misc = PERF_RECORD_MISC_USER;
280 event.sample.header.size = sizeof(struct perf_event_header);
281
282 sample.ip = le64_to_cpu(branch->from);
283 sample.pid = btsq->pid;
284 sample.tid = btsq->tid;
285 sample.addr = le64_to_cpu(branch->to);
286 sample.id = btsq->bts->branches_id;
287 sample.stream_id = btsq->bts->branches_id;
288 sample.period = 1;
289 sample.cpu = btsq->cpu;
290 sample.flags = btsq->sample_flags;
291 sample.insn_len = btsq->intel_pt_insn.length;
292
293 if (bts->synth_opts.inject) {
294 event.sample.header.size = bts->branches_event_size;
295 ret = perf_event__synthesize_sample(&event,
296 bts->branches_sample_type,
297 0, &sample,
298 bts->synth_needs_swap);
299 if (ret)
300 return ret;
301 }
302
303 ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
304 if (ret)
305 pr_err("Intel BTS: failed to deliver branch event, error %d\n",
306 ret);
307
308 return ret;
309}
310
311static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
312{
313 struct machine *machine = btsq->bts->machine;
314 struct thread *thread;
315 struct addr_location al;
316 unsigned char buf[1024];
317 size_t bufsz;
318 ssize_t len;
319 int x86_64;
320 uint8_t cpumode;
321 int err = -1;
322
323 bufsz = intel_pt_insn_max_size();
324
325 if (machine__kernel_ip(machine, ip))
326 cpumode = PERF_RECORD_MISC_KERNEL;
327 else
328 cpumode = PERF_RECORD_MISC_USER;
329
330 thread = machine__find_thread(machine, -1, btsq->tid);
331 if (!thread)
332 return -1;
333
334 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
335 if (!al.map || !al.map->dso)
336 goto out_put;
337
338 len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
339 if (len <= 0)
340 goto out_put;
341
342 /* Load maps to ensure dso->is_64_bit has been updated */
343 map__load(al.map, machine->symbol_filter);
344
345 x86_64 = al.map->dso->is_64_bit;
346
347 if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
348 goto out_put;
349
350 err = 0;
351out_put:
352 thread__put(thread);
353 return err;
354}
355
356static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
357 pid_t tid, u64 ip)
358{
359 union perf_event event;
360 int err;
361
362 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
363 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
364 "Failed to get instruction");
365
366 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
367 if (err)
368 pr_err("Intel BTS: failed to deliver error event, error %d\n",
369 err);
370
371 return err;
372}
373
374static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
375 struct branch *branch)
376{
377 int err;
378
379 if (!branch->from) {
380 if (branch->to)
381 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
382 PERF_IP_FLAG_TRACE_BEGIN;
383 else
384 btsq->sample_flags = 0;
385 btsq->intel_pt_insn.length = 0;
386 } else if (!branch->to) {
387 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
388 PERF_IP_FLAG_TRACE_END;
389 btsq->intel_pt_insn.length = 0;
390 } else {
391 err = intel_bts_get_next_insn(btsq, branch->from);
392 if (err) {
393 btsq->sample_flags = 0;
394 btsq->intel_pt_insn.length = 0;
395 if (!btsq->bts->synth_opts.errors)
396 return 0;
397 err = intel_bts_synth_error(btsq->bts, btsq->cpu,
398 btsq->pid, btsq->tid,
399 branch->from);
400 return err;
401 }
402 btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
403 /* Check for an async branch into the kernel */
404 if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
405 machine__kernel_ip(btsq->bts->machine, branch->to) &&
406 btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
407 PERF_IP_FLAG_CALL |
408 PERF_IP_FLAG_SYSCALLRET))
409 btsq->sample_flags = PERF_IP_FLAG_BRANCH |
410 PERF_IP_FLAG_CALL |
411 PERF_IP_FLAG_ASYNC |
412 PERF_IP_FLAG_INTERRUPT;
413 }
414
415 return 0;
416}
417
418static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
419 struct auxtrace_buffer *buffer)
420{
421 struct branch *branch;
422 size_t sz, bsz = sizeof(struct branch);
423 u32 filter = btsq->bts->branches_filter;
424 int err = 0;
425
426 if (buffer->use_data) {
427 sz = buffer->use_size;
428 branch = buffer->use_data;
429 } else {
430 sz = buffer->size;
431 branch = buffer->data;
432 }
433
434 if (!btsq->bts->sample_branches)
435 return 0;
436
437 for (; sz > bsz; branch += 1, sz -= bsz) {
438 if (!branch->from && !branch->to)
439 continue;
440 intel_bts_get_branch_type(btsq, branch);
441 if (filter && !(filter & btsq->sample_flags))
442 continue;
443 err = intel_bts_synth_branch_sample(btsq, branch);
444 if (err)
445 break;
446 }
447 return err;
448}
449
450static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
451{
452 struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
453 struct auxtrace_queue *queue;
454 struct thread *thread;
455 int err;
456
457 if (btsq->done)
458 return 1;
459
460 if (btsq->pid == -1) {
461 thread = machine__find_thread(btsq->bts->machine, -1,
462 btsq->tid);
463 if (thread)
464 btsq->pid = thread->pid_;
465 } else {
466 thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
467 btsq->tid);
468 }
469
470 queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
471
472 if (!buffer)
473 buffer = auxtrace_buffer__next(queue, NULL);
474
475 if (!buffer) {
476 if (!btsq->bts->sampling_mode)
477 btsq->done = 1;
478 err = 1;
479 goto out_put;
480 }
481
482 /* Currently there is no support for split buffers */
483 if (buffer->consecutive) {
484 err = -EINVAL;
485 goto out_put;
486 }
487
488 if (!buffer->data) {
489 int fd = perf_data_file__fd(btsq->bts->session->file);
490
491 buffer->data = auxtrace_buffer__get_data(buffer, fd);
492 if (!buffer->data) {
493 err = -ENOMEM;
494 goto out_put;
495 }
496 }
497
498 if (btsq->bts->snapshot_mode && !buffer->consecutive &&
499 intel_bts_do_fix_overlap(queue, buffer)) {
500 err = -ENOMEM;
501 goto out_put;
502 }
503
504 if (!btsq->bts->synth_opts.callchain && thread &&
505 (!old_buffer || btsq->bts->sampling_mode ||
506 (btsq->bts->snapshot_mode && !buffer->consecutive)))
507 thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
508
509 err = intel_bts_process_buffer(btsq, buffer);
510
511 auxtrace_buffer__drop_data(buffer);
512
513 btsq->buffer = auxtrace_buffer__next(queue, buffer);
514 if (btsq->buffer) {
515 if (timestamp)
516 *timestamp = btsq->buffer->reference;
517 } else {
518 if (!btsq->bts->sampling_mode)
519 btsq->done = 1;
520 }
521out_put:
522 thread__put(thread);
523 return err;
524}
525
526static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
527{
528 u64 ts = 0;
529 int ret;
530
531 while (1) {
532 ret = intel_bts_process_queue(btsq, &ts);
533 if (ret < 0)
534 return ret;
535 if (ret)
536 break;
537 }
538 return 0;
539}
540
541static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
542{
543 struct auxtrace_queues *queues = &bts->queues;
544 unsigned int i;
545
546 for (i = 0; i < queues->nr_queues; i++) {
547 struct auxtrace_queue *queue = &bts->queues.queue_array[i];
548 struct intel_bts_queue *btsq = queue->priv;
549
550 if (btsq && btsq->tid == tid)
551 return intel_bts_flush_queue(btsq);
552 }
553 return 0;
554}
555
556static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
557{
558 while (1) {
559 unsigned int queue_nr;
560 struct auxtrace_queue *queue;
561 struct intel_bts_queue *btsq;
562 u64 ts = 0;
563 int ret;
564
565 if (!bts->heap.heap_cnt)
566 return 0;
567
568 if (bts->heap.heap_array[0].ordinal > timestamp)
569 return 0;
570
571 queue_nr = bts->heap.heap_array[0].queue_nr;
572 queue = &bts->queues.queue_array[queue_nr];
573 btsq = queue->priv;
574
575 auxtrace_heap__pop(&bts->heap);
576
577 ret = intel_bts_process_queue(btsq, &ts);
578 if (ret < 0) {
579 auxtrace_heap__add(&bts->heap, queue_nr, ts);
580 return ret;
581 }
582
583 if (!ret) {
584 ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
585 if (ret < 0)
586 return ret;
587 } else {
588 btsq->on_heap = false;
589 }
590 }
591
592 return 0;
593}
594
595static int intel_bts_process_event(struct perf_session *session,
596 union perf_event *event,
597 struct perf_sample *sample,
598 struct perf_tool *tool)
599{
600 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
601 auxtrace);
602 u64 timestamp;
603 int err;
604
605 if (dump_trace)
606 return 0;
607
608 if (!tool->ordered_events) {
609 pr_err("Intel BTS requires ordered events\n");
610 return -EINVAL;
611 }
612
613 if (sample->time && sample->time != (u64)-1)
614 timestamp = perf_time_to_tsc(sample->time, &bts->tc);
615 else
616 timestamp = 0;
617
618 err = intel_bts_update_queues(bts);
619 if (err)
620 return err;
621
622 err = intel_bts_process_queues(bts, timestamp);
623 if (err)
624 return err;
625 if (event->header.type == PERF_RECORD_EXIT) {
626 err = intel_bts_process_tid_exit(bts, event->comm.tid);
627 if (err)
628 return err;
629 }
630
631 if (event->header.type == PERF_RECORD_AUX &&
632 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
633 bts->synth_opts.errors)
634 err = intel_bts_lost(bts, sample);
635
636 return err;
637}
638
639static int intel_bts_process_auxtrace_event(struct perf_session *session,
640 union perf_event *event,
641 struct perf_tool *tool __maybe_unused)
642{
643 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
644 auxtrace);
645
646 if (bts->sampling_mode)
647 return 0;
648
649 if (!bts->data_queued) {
650 struct auxtrace_buffer *buffer;
651 off_t data_offset;
652 int fd = perf_data_file__fd(session->file);
653 int err;
654
655 if (perf_data_file__is_pipe(session->file)) {
656 data_offset = 0;
657 } else {
658 data_offset = lseek(fd, 0, SEEK_CUR);
659 if (data_offset == -1)
660 return -errno;
661 }
662
663 err = auxtrace_queues__add_event(&bts->queues, session, event,
664 data_offset, &buffer);
665 if (err)
666 return err;
667
668 /* Dump here now we have copied a piped trace out of the pipe */
669 if (dump_trace) {
670 if (auxtrace_buffer__get_data(buffer, fd)) {
671 intel_bts_dump_event(bts, buffer->data,
672 buffer->size);
673 auxtrace_buffer__put_data(buffer);
674 }
675 }
676 }
677
678 return 0;
679}
680
681static int intel_bts_flush(struct perf_session *session __maybe_unused,
682 struct perf_tool *tool __maybe_unused)
683{
684 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
685 auxtrace);
686 int ret;
687
688 if (dump_trace || bts->sampling_mode)
689 return 0;
690
691 if (!tool->ordered_events)
692 return -EINVAL;
693
694 ret = intel_bts_update_queues(bts);
695 if (ret < 0)
696 return ret;
697
698 return intel_bts_process_queues(bts, MAX_TIMESTAMP);
699}
700
701static void intel_bts_free_queue(void *priv)
702{
703 struct intel_bts_queue *btsq = priv;
704
705 if (!btsq)
706 return;
707 free(btsq);
708}
709
710static void intel_bts_free_events(struct perf_session *session)
711{
712 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
713 auxtrace);
714 struct auxtrace_queues *queues = &bts->queues;
715 unsigned int i;
716
717 for (i = 0; i < queues->nr_queues; i++) {
718 intel_bts_free_queue(queues->queue_array[i].priv);
719 queues->queue_array[i].priv = NULL;
720 }
721 auxtrace_queues__free(queues);
722}
723
724static void intel_bts_free(struct perf_session *session)
725{
726 struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
727 auxtrace);
728
729 auxtrace_heap__free(&bts->heap);
730 intel_bts_free_events(session);
731 session->auxtrace = NULL;
732 free(bts);
733}
734
735struct intel_bts_synth {
736 struct perf_tool dummy_tool;
737 struct perf_session *session;
738};
739
740static int intel_bts_event_synth(struct perf_tool *tool,
741 union perf_event *event,
742 struct perf_sample *sample __maybe_unused,
743 struct machine *machine __maybe_unused)
744{
745 struct intel_bts_synth *intel_bts_synth =
746 container_of(tool, struct intel_bts_synth, dummy_tool);
747
748 return perf_session__deliver_synth_event(intel_bts_synth->session,
749 event, NULL);
750}
751
752static int intel_bts_synth_event(struct perf_session *session,
753 struct perf_event_attr *attr, u64 id)
754{
755 struct intel_bts_synth intel_bts_synth;
756
757 memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
758 intel_bts_synth.session = session;
759
760 return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
761 &id, intel_bts_event_synth);
762}
763
764static int intel_bts_synth_events(struct intel_bts *bts,
765 struct perf_session *session)
766{
767 struct perf_evlist *evlist = session->evlist;
768 struct perf_evsel *evsel;
769 struct perf_event_attr attr;
770 bool found = false;
771 u64 id;
772 int err;
773
774 evlist__for_each(evlist, evsel) {
775 if (evsel->attr.type == bts->pmu_type && evsel->ids) {
776 found = true;
777 break;
778 }
779 }
780
781 if (!found) {
782 pr_debug("There are no selected events with Intel BTS data\n");
783 return 0;
784 }
785
786 memset(&attr, 0, sizeof(struct perf_event_attr));
787 attr.size = sizeof(struct perf_event_attr);
788 attr.type = PERF_TYPE_HARDWARE;
789 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
790 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
791 PERF_SAMPLE_PERIOD;
792 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
793 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
794 attr.exclude_user = evsel->attr.exclude_user;
795 attr.exclude_kernel = evsel->attr.exclude_kernel;
796 attr.exclude_hv = evsel->attr.exclude_hv;
797 attr.exclude_host = evsel->attr.exclude_host;
798 attr.exclude_guest = evsel->attr.exclude_guest;
799 attr.sample_id_all = evsel->attr.sample_id_all;
800 attr.read_format = evsel->attr.read_format;
801
802 id = evsel->id[0] + 1000000000;
803 if (!id)
804 id = 1;
805
806 if (bts->synth_opts.branches) {
807 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
808 attr.sample_period = 1;
809 attr.sample_type |= PERF_SAMPLE_ADDR;
810 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
811 id, (u64)attr.sample_type);
812 err = intel_bts_synth_event(session, &attr, id);
813 if (err) {
814 pr_err("%s: failed to synthesize 'branches' event type\n",
815 __func__);
816 return err;
817 }
818 bts->sample_branches = true;
819 bts->branches_sample_type = attr.sample_type;
820 bts->branches_id = id;
821 /*
822 * We only use sample types from PERF_SAMPLE_MASK so we can use
823 * __perf_evsel__sample_size() here.
824 */
825 bts->branches_event_size = sizeof(struct sample_event) +
826 __perf_evsel__sample_size(attr.sample_type);
827 }
828
829 bts->synth_needs_swap = evsel->needs_swap;
830
831 return 0;
832}
833
834static const char * const intel_bts_info_fmts[] = {
835 [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
836 [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
837 [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
838 [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
839 [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
840 [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
841};
842
843static void intel_bts_print_info(u64 *arr, int start, int finish)
844{
845 int i;
846
847 if (!dump_trace)
848 return;
849
850 for (i = start; i <= finish; i++)
851 fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
852}
853
854u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
855
856int intel_bts_process_auxtrace_info(union perf_event *event,
857 struct perf_session *session)
858{
859 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
860 size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
861 struct intel_bts *bts;
862 int err;
863
864 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
865 min_sz)
866 return -EINVAL;
867
868 bts = zalloc(sizeof(struct intel_bts));
869 if (!bts)
870 return -ENOMEM;
871
872 err = auxtrace_queues__init(&bts->queues);
873 if (err)
874 goto err_free;
875
876 bts->session = session;
877 bts->machine = &session->machines.host; /* No kvm support */
878 bts->auxtrace_type = auxtrace_info->type;
879 bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
880 bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
881 bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
882 bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
883 bts->cap_user_time_zero =
884 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
885 bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
886
887 bts->sampling_mode = false;
888
889 bts->auxtrace.process_event = intel_bts_process_event;
890 bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
891 bts->auxtrace.flush_events = intel_bts_flush;
892 bts->auxtrace.free_events = intel_bts_free_events;
893 bts->auxtrace.free = intel_bts_free;
894 session->auxtrace = &bts->auxtrace;
895
896 intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
897 INTEL_BTS_SNAPSHOT_MODE);
898
899 if (dump_trace)
900 return 0;
901
902 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
903 bts->synth_opts = *session->itrace_synth_opts;
904 else
905 itrace_synth_opts__set_default(&bts->synth_opts);
906
907 if (bts->synth_opts.calls)
908 bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
909 PERF_IP_FLAG_TRACE_END;
910 if (bts->synth_opts.returns)
911 bts->branches_filter |= PERF_IP_FLAG_RETURN |
912 PERF_IP_FLAG_TRACE_BEGIN;
913
914 err = intel_bts_synth_events(bts, session);
915 if (err)
916 goto err_free_queues;
917
918 err = auxtrace_queues__process_index(&bts->queues, session);
919 if (err)
920 goto err_free_queues;
921
922 if (bts->queues.populated)
923 bts->data_queued = true;
924
925 return 0;
926
927err_free_queues:
928 auxtrace_queues__free(&bts->queues);
929 session->auxtrace = NULL;
930err_free:
931 free(bts);
932 return err;
933}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000000..ca65e21b3e83
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,43 @@
1/*
2 * intel-bts.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__PERF_INTEL_BTS_H__
17#define INCLUDE__PERF_INTEL_BTS_H__
18
19#define INTEL_BTS_PMU_NAME "intel_bts"
20
21enum {
22 INTEL_BTS_PMU_TYPE,
23 INTEL_BTS_TIME_SHIFT,
24 INTEL_BTS_TIME_MULT,
25 INTEL_BTS_TIME_ZERO,
26 INTEL_BTS_CAP_USER_TIME_ZERO,
27 INTEL_BTS_SNAPSHOT_MODE,
28 INTEL_BTS_AUXTRACE_PRIV_MAX,
29};
30
31#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
32
33struct auxtrace_record;
34struct perf_tool;
35union perf_event;
36struct perf_session;
37
38struct auxtrace_record *intel_bts_recording_init(int *err);
39
40int intel_bts_process_auxtrace_info(union perf_event *event,
41 struct perf_session *session);
42
43#endif
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
new file mode 100644
index 000000000000..240730d682c1
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -0,0 +1,11 @@
1libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
2
3inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
4inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
5
6$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
7 @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
8
9$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
10
11CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
new file mode 100644
index 000000000000..517567347aac
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -0,0 +1,386 @@
1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7# Awk implementation sanity check
8function check_awk_implement() {
9 if (sprintf("%x", 0) != "0")
10 return "Your awk has a printf-format problem."
11 return ""
12}
13
14# Clear working vars
15function clear_vars() {
16 delete table
17 delete lptable2
18 delete lptable1
19 delete lptable3
20 eid = -1 # escape id
21 gid = -1 # group id
22 aid = -1 # AVX id
23 tname = ""
24}
25
26BEGIN {
27 # Implementation error checking
28 awkchecked = check_awk_implement()
29 if (awkchecked != "") {
30 print "Error: " awkchecked > "/dev/stderr"
31 print "Please try to use gawk." > "/dev/stderr"
32 exit 1
33 }
34
35 # Setup generating tables
36 print "/* x86 opcode map generated from x86-opcode-map.txt */"
37 print "/* Do not change this code. */\n"
38 ggid = 1
39 geid = 1
40 gaid = 0
41 delete etable
42 delete gtable
43 delete atable
44
45 opnd_expr = "^[A-Za-z/]"
46 ext_expr = "^\\("
47 sep_expr = "^\\|$"
48 group_expr = "^Grp[0-9A-Za-z]+"
49
50 imm_expr = "^[IJAOL][a-z]"
51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
54 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
55 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
56 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
57 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
58 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60 imm_flag["Ob"] = "INAT_MOFFSET"
61 imm_flag["Ov"] = "INAT_MOFFSET"
62 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
63
64 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
65 force64_expr = "\\([df]64\\)"
66 rex_expr = "^REX(\\.[XRWB]+)*"
67 fpu_expr = "^ESC" # TODO
68
69 lprefix1_expr = "\\((66|!F3)\\)"
70 lprefix2_expr = "\\(F3\\)"
71 lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
72 lprefix_expr = "\\((66|F2|F3)\\)"
73 max_lprefix = 4
74
75 # All opcodes starting with lower-case 'v' or with (v1) superscript
76 # accepts VEX prefix
77 vexok_opcode_expr = "^v.*"
78 vexok_expr = "\\(v1\\)"
79 # All opcodes with (v) superscript supports *only* VEX prefix
80 vexonly_expr = "\\(v\\)"
81
82 prefix_expr = "\\(Prefix\\)"
83 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
84 prefix_num["REPNE"] = "INAT_PFX_REPNE"
85 prefix_num["REP/REPE"] = "INAT_PFX_REPE"
86 prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
87 prefix_num["XRELEASE"] = "INAT_PFX_REPE"
88 prefix_num["LOCK"] = "INAT_PFX_LOCK"
89 prefix_num["SEG=CS"] = "INAT_PFX_CS"
90 prefix_num["SEG=DS"] = "INAT_PFX_DS"
91 prefix_num["SEG=ES"] = "INAT_PFX_ES"
92 prefix_num["SEG=FS"] = "INAT_PFX_FS"
93 prefix_num["SEG=GS"] = "INAT_PFX_GS"
94 prefix_num["SEG=SS"] = "INAT_PFX_SS"
95 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
96 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
97 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
98
99 clear_vars()
100}
101
102function semantic_error(msg) {
103 print "Semantic error at " NR ": " msg > "/dev/stderr"
104 exit 1
105}
106
107function debug(msg) {
108 print "DEBUG: " msg
109}
110
111function array_size(arr, i,c) {
112 c = 0
113 for (i in arr)
114 c++
115 return c
116}
117
118/^Table:/ {
119 print "/* " $0 " */"
120 if (tname != "")
121 semantic_error("Hit Table: before EndTable:.");
122}
123
124/^Referrer:/ {
125 if (NF != 1) {
126 # escape opcode table
127 ref = ""
128 for (i = 2; i <= NF; i++)
129 ref = ref $i
130 eid = escape[ref]
131 tname = sprintf("inat_escape_table_%d", eid)
132 }
133}
134
135/^AVXcode:/ {
136 if (NF != 1) {
137 # AVX/escape opcode table
138 aid = $2
139 if (gaid <= aid)
140 gaid = aid + 1
141 if (tname == "") # AVX only opcode table
142 tname = sprintf("inat_avx_table_%d", $2)
143 }
144 if (aid == -1 && eid == -1) # primary opcode table
145 tname = "inat_primary_table"
146}
147
148/^GrpTable:/ {
149 print "/* " $0 " */"
150 if (!($2 in group))
151 semantic_error("No group: " $2 )
152 gid = group[$2]
153 tname = "inat_group_table_" gid
154}
155
156function print_table(tbl,name,fmt,n)
157{
158 print "const insn_attr_t " name " = {"
159 for (i = 0; i < n; i++) {
160 id = sprintf(fmt, i)
161 if (tbl[id])
162 print " [" id "] = " tbl[id] ","
163 }
164 print "};"
165}
166
167/^EndTable/ {
168 if (gid != -1) {
169 # print group tables
170 if (array_size(table) != 0) {
171 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
172 "0x%x", 8)
173 gtable[gid,0] = tname
174 }
175 if (array_size(lptable1) != 0) {
176 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
177 "0x%x", 8)
178 gtable[gid,1] = tname "_1"
179 }
180 if (array_size(lptable2) != 0) {
181 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
182 "0x%x", 8)
183 gtable[gid,2] = tname "_2"
184 }
185 if (array_size(lptable3) != 0) {
186 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
187 "0x%x", 8)
188 gtable[gid,3] = tname "_3"
189 }
190 } else {
191 # print primary/escaped tables
192 if (array_size(table) != 0) {
193 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
194 "0x%02x", 256)
195 etable[eid,0] = tname
196 if (aid >= 0)
197 atable[aid,0] = tname
198 }
199 if (array_size(lptable1) != 0) {
200 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
201 "0x%02x", 256)
202 etable[eid,1] = tname "_1"
203 if (aid >= 0)
204 atable[aid,1] = tname "_1"
205 }
206 if (array_size(lptable2) != 0) {
207 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
208 "0x%02x", 256)
209 etable[eid,2] = tname "_2"
210 if (aid >= 0)
211 atable[aid,2] = tname "_2"
212 }
213 if (array_size(lptable3) != 0) {
214 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
215 "0x%02x", 256)
216 etable[eid,3] = tname "_3"
217 if (aid >= 0)
218 atable[aid,3] = tname "_3"
219 }
220 }
221 print ""
222 clear_vars()
223}
224
225function add_flags(old,new) {
226 if (old && new)
227 return old " | " new
228 else if (old)
229 return old
230 else
231 return new
232}
233
234# convert operands to flags.
235function convert_operands(count,opnd, i,j,imm,mod)
236{
237 imm = null
238 mod = null
239 for (j = 1; j <= count; j++) {
240 i = opnd[j]
241 if (match(i, imm_expr) == 1) {
242 if (!imm_flag[i])
243 semantic_error("Unknown imm opnd: " i)
244 if (imm) {
245 if (i != "Ib")
246 semantic_error("Second IMM error")
247 imm = add_flags(imm, "INAT_SCNDIMM")
248 } else
249 imm = imm_flag[i]
250 } else if (match(i, modrm_expr))
251 mod = "INAT_MODRM"
252 }
253 return add_flags(imm, mod)
254}
255
256/^[0-9a-f]+\:/ {
257 if (NR == 1)
258 next
259 # get index
260 idx = "0x" substr($1, 1, index($1,":") - 1)
261 if (idx in table)
262 semantic_error("Redefine " idx " in " tname)
263
264 # check if escaped opcode
265 if ("escape" == $2) {
266 if ($3 != "#")
267 semantic_error("No escaped name")
268 ref = ""
269 for (i = 4; i <= NF; i++)
270 ref = ref $i
271 if (ref in escape)
272 semantic_error("Redefine escape (" ref ")")
273 escape[ref] = geid
274 geid++
275 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
276 next
277 }
278
279 variant = null
280 # converts
281 i = 2
282 while (i <= NF) {
283 opcode = $(i++)
284 delete opnds
285 ext = null
286 flags = null
287 opnd = null
288 # parse one opcode
289 if (match($i, opnd_expr)) {
290 opnd = $i
291 count = split($(i++), opnds, ",")
292 flags = convert_operands(count, opnds)
293 }
294 if (match($i, ext_expr))
295 ext = $(i++)
296 if (match($i, sep_expr))
297 i++
298 else if (i < NF)
299 semantic_error($i " is not a separator")
300
301 # check if group opcode
302 if (match(opcode, group_expr)) {
303 if (!(opcode in group)) {
304 group[opcode] = ggid
305 ggid++
306 }
307 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
308 }
309 # check force(or default) 64bit
310 if (match(ext, force64_expr))
311 flags = add_flags(flags, "INAT_FORCE64")
312
313 # check REX prefix
314 if (match(opcode, rex_expr))
315 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
316
317 # check coprocessor escape : TODO
318 if (match(opcode, fpu_expr))
319 flags = add_flags(flags, "INAT_MODRM")
320
321 # check VEX codes
322 if (match(ext, vexonly_expr))
323 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
324 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
325 flags = add_flags(flags, "INAT_VEXOK")
326
327 # check prefixes
328 if (match(ext, prefix_expr)) {
329 if (!prefix_num[opcode])
330 semantic_error("Unknown prefix: " opcode)
331 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
332 }
333 if (length(flags) == 0)
334 continue
335 # check if last prefix
336 if (match(ext, lprefix1_expr)) {
337 lptable1[idx] = add_flags(lptable1[idx],flags)
338 variant = "INAT_VARIANT"
339 }
340 if (match(ext, lprefix2_expr)) {
341 lptable2[idx] = add_flags(lptable2[idx],flags)
342 variant = "INAT_VARIANT"
343 }
344 if (match(ext, lprefix3_expr)) {
345 lptable3[idx] = add_flags(lptable3[idx],flags)
346 variant = "INAT_VARIANT"
347 }
348 if (!match(ext, lprefix_expr)){
349 table[idx] = add_flags(table[idx],flags)
350 }
351 }
352 if (variant)
353 table[idx] = add_flags(table[idx],variant)
354}
355
356END {
357 if (awkchecked != "")
358 exit 1
359 # print escape opcode map's array
360 print "/* Escape opcode map array */"
361 print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
362 "[INAT_LSTPFX_MAX + 1] = {"
363 for (i = 0; i < geid; i++)
364 for (j = 0; j < max_lprefix; j++)
365 if (etable[i,j])
366 print " ["i"]["j"] = "etable[i,j]","
367 print "};\n"
368 # print group opcode map's array
369 print "/* Group opcode map array */"
370 print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
371 "[INAT_LSTPFX_MAX + 1] = {"
372 for (i = 0; i < ggid; i++)
373 for (j = 0; j < max_lprefix; j++)
374 if (gtable[i,j])
375 print " ["i"]["j"] = "gtable[i,j]","
376 print "};\n"
377 # print AVX opcode map's array
378 print "/* AVX opcode map array */"
379 print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
380 "[INAT_LSTPFX_MAX + 1] = {"
381 for (i = 0; i < gaid; i++)
382 for (j = 0; j < max_lprefix; j++)
383 if (atable[i,j])
384 print " ["i"]["j"] = "atable[i,j]","
385 print "};"
386}
diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c
new file mode 100644
index 000000000000..906d94aa0a24
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.c
@@ -0,0 +1,96 @@
1/*
2 * x86 instruction attribute tables
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21#include "insn.h"
22
23/* Attribute tables are generated from opcode map */
24#include "inat-tables.c"
25
26/* Attribute search APIs */
27insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
28{
29 return inat_primary_table[opcode];
30}
31
32int inat_get_last_prefix_id(insn_byte_t last_pfx)
33{
34 insn_attr_t lpfx_attr;
35
36 lpfx_attr = inat_get_opcode_attribute(last_pfx);
37 return inat_last_prefix_id(lpfx_attr);
38}
39
40insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
41 insn_attr_t esc_attr)
42{
43 const insn_attr_t *table;
44 int n;
45
46 n = inat_escape_id(esc_attr);
47
48 table = inat_escape_tables[n][0];
49 if (!table)
50 return 0;
51 if (inat_has_variant(table[opcode]) && lpfx_id) {
52 table = inat_escape_tables[n][lpfx_id];
53 if (!table)
54 return 0;
55 }
56 return table[opcode];
57}
58
59insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
60 insn_attr_t grp_attr)
61{
62 const insn_attr_t *table;
63 int n;
64
65 n = inat_group_id(grp_attr);
66
67 table = inat_group_tables[n][0];
68 if (!table)
69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
71 table = inat_group_tables[n][lpfx_id];
72 if (!table)
73 return inat_group_common_attribute(grp_attr);
74 }
75 return table[X86_MODRM_REG(modrm)] |
76 inat_group_common_attribute(grp_attr);
77}
78
79insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
80 insn_byte_t vex_p)
81{
82 const insn_attr_t *table;
83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
84 return 0;
85 /* At first, this checks the master table */
86 table = inat_avx_tables[vex_m][0];
87 if (!table)
88 return 0;
89 if (!inat_is_group(table[opcode]) && vex_p) {
90 /* If this is not a group, get attribute directly */
91 table = inat_avx_tables[vex_m][vex_p];
92 if (!table)
93 return 0;
94 }
95 return table[opcode];
96}
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
new file mode 100644
index 000000000000..611645e903a8
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -0,0 +1,221 @@
1#ifndef _ASM_X86_INAT_H
2#define _ASM_X86_INAT_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23#include "inat_types.h"
24
25/*
26 * Internal bits. Don't use bitmasks directly, because these bits are
27 * unstable. You should use checking functions.
28 */
29
30#define INAT_OPCODE_TABLE_SIZE 256
31#define INAT_GROUP_TABLE_SIZE 8
32
33/* Legacy last prefixes */
34#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
35#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
36#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
37/* Other Legacy prefixes */
38#define INAT_PFX_LOCK 4 /* 0xF0 */
39#define INAT_PFX_CS 5 /* 0x2E */
40#define INAT_PFX_DS 6 /* 0x3E */
41#define INAT_PFX_ES 7 /* 0x26 */
42#define INAT_PFX_FS 8 /* 0x64 */
43#define INAT_PFX_GS 9 /* 0x65 */
44#define INAT_PFX_SS 10 /* 0x36 */
45#define INAT_PFX_ADDRSZ 11 /* 0x67 */
46/* x86-64 REX prefix */
47#define INAT_PFX_REX 12 /* 0x4X */
48/* AVX VEX prefixes */
49#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
50#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
51
52#define INAT_LSTPFX_MAX 3
53#define INAT_LGCPFX_MAX 11
54
55/* Immediate size */
56#define INAT_IMM_BYTE 1
57#define INAT_IMM_WORD 2
58#define INAT_IMM_DWORD 3
59#define INAT_IMM_QWORD 4
60#define INAT_IMM_PTR 5
61#define INAT_IMM_VWORD32 6
62#define INAT_IMM_VWORD 7
63
64/* Legacy prefix */
65#define INAT_PFX_OFFS 0
66#define INAT_PFX_BITS 4
67#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
68#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
69/* Escape opcodes */
70#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
71#define INAT_ESC_BITS 2
72#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
73#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
74/* Group opcodes (1-16) */
75#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
76#define INAT_GRP_BITS 5
77#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
78#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
79/* Immediates */
80#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
81#define INAT_IMM_BITS 3
82#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
83/* Flags */
84#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
85#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
86#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
87#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
88#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
89#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
90#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
91#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
92/* Attribute making macros for attribute tables */
93#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
94#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
95#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
96#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
97
98/* Attribute search APIs */
99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
100extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
101extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
102 int lpfx_id,
103 insn_attr_t esc_attr);
104extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
105 int lpfx_id,
106 insn_attr_t esc_attr);
107extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
108 insn_byte_t vex_m,
109 insn_byte_t vex_pp);
110
111/* Attribute checking functions */
112static inline int inat_is_legacy_prefix(insn_attr_t attr)
113{
114 attr &= INAT_PFX_MASK;
115 return attr && attr <= INAT_LGCPFX_MAX;
116}
117
118static inline int inat_is_address_size_prefix(insn_attr_t attr)
119{
120 return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
121}
122
123static inline int inat_is_operand_size_prefix(insn_attr_t attr)
124{
125 return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
126}
127
128static inline int inat_is_rex_prefix(insn_attr_t attr)
129{
130 return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
131}
132
133static inline int inat_last_prefix_id(insn_attr_t attr)
134{
135 if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
136 return 0;
137 else
138 return attr & INAT_PFX_MASK;
139}
140
141static inline int inat_is_vex_prefix(insn_attr_t attr)
142{
143 attr &= INAT_PFX_MASK;
144 return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
145}
146
147static inline int inat_is_vex3_prefix(insn_attr_t attr)
148{
149 return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
150}
151
152static inline int inat_is_escape(insn_attr_t attr)
153{
154 return attr & INAT_ESC_MASK;
155}
156
157static inline int inat_escape_id(insn_attr_t attr)
158{
159 return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
160}
161
162static inline int inat_is_group(insn_attr_t attr)
163{
164 return attr & INAT_GRP_MASK;
165}
166
167static inline int inat_group_id(insn_attr_t attr)
168{
169 return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
170}
171
172static inline int inat_group_common_attribute(insn_attr_t attr)
173{
174 return attr & ~INAT_GRP_MASK;
175}
176
177static inline int inat_has_immediate(insn_attr_t attr)
178{
179 return attr & INAT_IMM_MASK;
180}
181
182static inline int inat_immediate_size(insn_attr_t attr)
183{
184 return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
185}
186
187static inline int inat_has_modrm(insn_attr_t attr)
188{
189 return attr & INAT_MODRM;
190}
191
192static inline int inat_is_force64(insn_attr_t attr)
193{
194 return attr & INAT_FORCE64;
195}
196
197static inline int inat_has_second_immediate(insn_attr_t attr)
198{
199 return attr & INAT_SCNDIMM;
200}
201
202static inline int inat_has_moffset(insn_attr_t attr)
203{
204 return attr & INAT_MOFFSET;
205}
206
207static inline int inat_has_variant(insn_attr_t attr)
208{
209 return attr & INAT_VARIANT;
210}
211
212static inline int inat_accept_vex(insn_attr_t attr)
213{
214 return attr & INAT_VEXOK;
215}
216
217static inline int inat_must_vex(insn_attr_t attr)
218{
219 return attr & INAT_VEXONLY;
220}
221#endif
diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat_types.h
@@ -0,0 +1,29 @@
1#ifndef _ASM_X86_INAT_TYPES_H
2#define _ASM_X86_INAT_TYPES_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23
24/* Instruction attributes */
25typedef unsigned int insn_attr_t;
26typedef unsigned char insn_byte_t;
27typedef signed int insn_value_t;
28
29#endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
new file mode 100644
index 000000000000..47314a64399c
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -0,0 +1,594 @@
1/*
2 * x86 instruction analysis
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
19 */
20
21#ifdef __KERNEL__
22#include <linux/string.h>
23#else
24#include <string.h>
25#endif
26#include "inat.h"
27#include "insn.h"
28
29/* Verify next sizeof(t) bytes can be on the same instruction */
30#define validate_next(t, insn, n) \
31 ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
32
33#define __get_next(t, insn) \
34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
35
36#define __peek_nbyte_next(t, insn, n) \
37 ({ t r = *(t*)((insn)->next_byte + n); r; })
38
39#define get_next(t, insn) \
40 ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
41
42#define peek_nbyte_next(t, insn, n) \
43 ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
44
45#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
46
47/**
48 * insn_init() - initialize struct insn
49 * @insn: &struct insn to be initialized
50 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
51 * @x86_64: !0 for 64-bit kernel or 64-bit app
52 */
53void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
54{
55 /*
56 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
57 * even if the input buffer is long enough to hold them.
58 */
59 if (buf_len > MAX_INSN_SIZE)
60 buf_len = MAX_INSN_SIZE;
61
62 memset(insn, 0, sizeof(*insn));
63 insn->kaddr = kaddr;
64 insn->end_kaddr = kaddr + buf_len;
65 insn->next_byte = kaddr;
66 insn->x86_64 = x86_64 ? 1 : 0;
67 insn->opnd_bytes = 4;
68 if (x86_64)
69 insn->addr_bytes = 8;
70 else
71 insn->addr_bytes = 4;
72}
73
74/**
75 * insn_get_prefixes - scan x86 instruction prefix bytes
76 * @insn: &struct insn containing instruction
77 *
78 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
79 * to point to the (first) opcode. No effect if @insn->prefixes.got
80 * is already set.
81 */
82void insn_get_prefixes(struct insn *insn)
83{
84 struct insn_field *prefixes = &insn->prefixes;
85 insn_attr_t attr;
86 insn_byte_t b, lb;
87 int i, nb;
88
89 if (prefixes->got)
90 return;
91
92 nb = 0;
93 lb = 0;
94 b = peek_next(insn_byte_t, insn);
95 attr = inat_get_opcode_attribute(b);
96 while (inat_is_legacy_prefix(attr)) {
97 /* Skip if same prefix */
98 for (i = 0; i < nb; i++)
99 if (prefixes->bytes[i] == b)
100 goto found;
101 if (nb == 4)
102 /* Invalid instruction */
103 break;
104 prefixes->bytes[nb++] = b;
105 if (inat_is_address_size_prefix(attr)) {
106 /* address size switches 2/4 or 4/8 */
107 if (insn->x86_64)
108 insn->addr_bytes ^= 12;
109 else
110 insn->addr_bytes ^= 6;
111 } else if (inat_is_operand_size_prefix(attr)) {
112 /* oprand size switches 2/4 */
113 insn->opnd_bytes ^= 6;
114 }
115found:
116 prefixes->nbytes++;
117 insn->next_byte++;
118 lb = b;
119 b = peek_next(insn_byte_t, insn);
120 attr = inat_get_opcode_attribute(b);
121 }
122 /* Set the last prefix */
123 if (lb && lb != insn->prefixes.bytes[3]) {
124 if (unlikely(insn->prefixes.bytes[3])) {
125 /* Swap the last prefix */
126 b = insn->prefixes.bytes[3];
127 for (i = 0; i < nb; i++)
128 if (prefixes->bytes[i] == lb)
129 prefixes->bytes[i] = b;
130 }
131 insn->prefixes.bytes[3] = lb;
132 }
133
134 /* Decode REX prefix */
135 if (insn->x86_64) {
136 b = peek_next(insn_byte_t, insn);
137 attr = inat_get_opcode_attribute(b);
138 if (inat_is_rex_prefix(attr)) {
139 insn->rex_prefix.value = b;
140 insn->rex_prefix.nbytes = 1;
141 insn->next_byte++;
142 if (X86_REX_W(b))
143 /* REX.W overrides opnd_size */
144 insn->opnd_bytes = 8;
145 }
146 }
147 insn->rex_prefix.got = 1;
148
149 /* Decode VEX prefix */
150 b = peek_next(insn_byte_t, insn);
151 attr = inat_get_opcode_attribute(b);
152 if (inat_is_vex_prefix(attr)) {
153 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
154 if (!insn->x86_64) {
155 /*
156 * In 32-bits mode, if the [7:6] bits (mod bits of
157 * ModRM) on the second byte are not 11b, it is
158 * LDS or LES.
159 */
160 if (X86_MODRM_MOD(b2) != 3)
161 goto vex_end;
162 }
163 insn->vex_prefix.bytes[0] = b;
164 insn->vex_prefix.bytes[1] = b2;
165 if (inat_is_vex3_prefix(attr)) {
166 b2 = peek_nbyte_next(insn_byte_t, insn, 2);
167 insn->vex_prefix.bytes[2] = b2;
168 insn->vex_prefix.nbytes = 3;
169 insn->next_byte += 3;
170 if (insn->x86_64 && X86_VEX_W(b2))
171 /* VEX.W overrides opnd_size */
172 insn->opnd_bytes = 8;
173 } else {
174 /*
175 * For VEX2, fake VEX3-like byte#2.
176 * Makes it easier to decode vex.W, vex.vvvv,
177 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
178 */
179 insn->vex_prefix.bytes[2] = b2 & 0x7f;
180 insn->vex_prefix.nbytes = 2;
181 insn->next_byte += 2;
182 }
183 }
184vex_end:
185 insn->vex_prefix.got = 1;
186
187 prefixes->got = 1;
188
189err_out:
190 return;
191}
192
193/**
194 * insn_get_opcode - collect opcode(s)
195 * @insn: &struct insn containing instruction
196 *
197 * Populates @insn->opcode, updates @insn->next_byte to point past the
198 * opcode byte(s), and set @insn->attr (except for groups).
199 * If necessary, first collects any preceding (prefix) bytes.
200 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
201 * is already 1.
202 */
203void insn_get_opcode(struct insn *insn)
204{
205 struct insn_field *opcode = &insn->opcode;
206 insn_byte_t op;
207 int pfx_id;
208 if (opcode->got)
209 return;
210 if (!insn->prefixes.got)
211 insn_get_prefixes(insn);
212
213 /* Get first opcode */
214 op = get_next(insn_byte_t, insn);
215 opcode->bytes[0] = op;
216 opcode->nbytes = 1;
217
218 /* Check if there is VEX prefix or not */
219 if (insn_is_avx(insn)) {
220 insn_byte_t m, p;
221 m = insn_vex_m_bits(insn);
222 p = insn_vex_p_bits(insn);
223 insn->attr = inat_get_avx_attribute(op, m, p);
224 if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
225 insn->attr = 0; /* This instruction is bad */
226 goto end; /* VEX has only 1 byte for opcode */
227 }
228
229 insn->attr = inat_get_opcode_attribute(op);
230 while (inat_is_escape(insn->attr)) {
231 /* Get escaped opcode */
232 op = get_next(insn_byte_t, insn);
233 opcode->bytes[opcode->nbytes++] = op;
234 pfx_id = insn_last_prefix_id(insn);
235 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
236 }
237 if (inat_must_vex(insn->attr))
238 insn->attr = 0; /* This instruction is bad */
239end:
240 opcode->got = 1;
241
242err_out:
243 return;
244}
245
246/**
247 * insn_get_modrm - collect ModRM byte, if any
248 * @insn: &struct insn containing instruction
249 *
250 * Populates @insn->modrm and updates @insn->next_byte to point past the
251 * ModRM byte, if any. If necessary, first collects the preceding bytes
252 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
253 */
254void insn_get_modrm(struct insn *insn)
255{
256 struct insn_field *modrm = &insn->modrm;
257 insn_byte_t pfx_id, mod;
258 if (modrm->got)
259 return;
260 if (!insn->opcode.got)
261 insn_get_opcode(insn);
262
263 if (inat_has_modrm(insn->attr)) {
264 mod = get_next(insn_byte_t, insn);
265 modrm->value = mod;
266 modrm->nbytes = 1;
267 if (inat_is_group(insn->attr)) {
268 pfx_id = insn_last_prefix_id(insn);
269 insn->attr = inat_get_group_attribute(mod, pfx_id,
270 insn->attr);
271 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
272 insn->attr = 0; /* This is bad */
273 }
274 }
275
276 if (insn->x86_64 && inat_is_force64(insn->attr))
277 insn->opnd_bytes = 8;
278 modrm->got = 1;
279
280err_out:
281 return;
282}
283
284
285/**
286 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
287 * @insn: &struct insn containing instruction
288 *
289 * If necessary, first collects the instruction up to and including the
290 * ModRM byte. No effect if @insn->x86_64 is 0.
291 */
292int insn_rip_relative(struct insn *insn)
293{
294 struct insn_field *modrm = &insn->modrm;
295
296 if (!insn->x86_64)
297 return 0;
298 if (!modrm->got)
299 insn_get_modrm(insn);
300 /*
301 * For rip-relative instructions, the mod field (top 2 bits)
302 * is zero and the r/m field (bottom 3 bits) is 0x5.
303 */
304 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
305}
306
307/**
308 * insn_get_sib() - Get the SIB byte of instruction
309 * @insn: &struct insn containing instruction
310 *
311 * If necessary, first collects the instruction up to and including the
312 * ModRM byte.
313 */
314void insn_get_sib(struct insn *insn)
315{
316 insn_byte_t modrm;
317
318 if (insn->sib.got)
319 return;
320 if (!insn->modrm.got)
321 insn_get_modrm(insn);
322 if (insn->modrm.nbytes) {
323 modrm = (insn_byte_t)insn->modrm.value;
324 if (insn->addr_bytes != 2 &&
325 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
326 insn->sib.value = get_next(insn_byte_t, insn);
327 insn->sib.nbytes = 1;
328 }
329 }
330 insn->sib.got = 1;
331
332err_out:
333 return;
334}
335
336
337/**
338 * insn_get_displacement() - Get the displacement of instruction
339 * @insn: &struct insn containing instruction
340 *
341 * If necessary, first collects the instruction up to and including the
342 * SIB byte.
343 * Displacement value is sign-expanded.
344 */
345void insn_get_displacement(struct insn *insn)
346{
347 insn_byte_t mod, rm, base;
348
349 if (insn->displacement.got)
350 return;
351 if (!insn->sib.got)
352 insn_get_sib(insn);
353 if (insn->modrm.nbytes) {
354 /*
355 * Interpreting the modrm byte:
356 * mod = 00 - no displacement fields (exceptions below)
357 * mod = 01 - 1-byte displacement field
358 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
359 * address size = 2 (0x67 prefix in 32-bit mode)
360 * mod = 11 - no memory operand
361 *
362 * If address size = 2...
363 * mod = 00, r/m = 110 - displacement field is 2 bytes
364 *
365 * If address size != 2...
366 * mod != 11, r/m = 100 - SIB byte exists
367 * mod = 00, SIB base = 101 - displacement field is 4 bytes
368 * mod = 00, r/m = 101 - rip-relative addressing, displacement
369 * field is 4 bytes
370 */
371 mod = X86_MODRM_MOD(insn->modrm.value);
372 rm = X86_MODRM_RM(insn->modrm.value);
373 base = X86_SIB_BASE(insn->sib.value);
374 if (mod == 3)
375 goto out;
376 if (mod == 1) {
377 insn->displacement.value = get_next(char, insn);
378 insn->displacement.nbytes = 1;
379 } else if (insn->addr_bytes == 2) {
380 if ((mod == 0 && rm == 6) || mod == 2) {
381 insn->displacement.value =
382 get_next(short, insn);
383 insn->displacement.nbytes = 2;
384 }
385 } else {
386 if ((mod == 0 && rm == 5) || mod == 2 ||
387 (mod == 0 && base == 5)) {
388 insn->displacement.value = get_next(int, insn);
389 insn->displacement.nbytes = 4;
390 }
391 }
392 }
393out:
394 insn->displacement.got = 1;
395
396err_out:
397 return;
398}
399
400/* Decode moffset16/32/64. Return 0 if failed */
401static int __get_moffset(struct insn *insn)
402{
403 switch (insn->addr_bytes) {
404 case 2:
405 insn->moffset1.value = get_next(short, insn);
406 insn->moffset1.nbytes = 2;
407 break;
408 case 4:
409 insn->moffset1.value = get_next(int, insn);
410 insn->moffset1.nbytes = 4;
411 break;
412 case 8:
413 insn->moffset1.value = get_next(int, insn);
414 insn->moffset1.nbytes = 4;
415 insn->moffset2.value = get_next(int, insn);
416 insn->moffset2.nbytes = 4;
417 break;
418 default: /* opnd_bytes must be modified manually */
419 goto err_out;
420 }
421 insn->moffset1.got = insn->moffset2.got = 1;
422
423 return 1;
424
425err_out:
426 return 0;
427}
428
429/* Decode imm v32(Iz). Return 0 if failed */
430static int __get_immv32(struct insn *insn)
431{
432 switch (insn->opnd_bytes) {
433 case 2:
434 insn->immediate.value = get_next(short, insn);
435 insn->immediate.nbytes = 2;
436 break;
437 case 4:
438 case 8:
439 insn->immediate.value = get_next(int, insn);
440 insn->immediate.nbytes = 4;
441 break;
442 default: /* opnd_bytes must be modified manually */
443 goto err_out;
444 }
445
446 return 1;
447
448err_out:
449 return 0;
450}
451
452/* Decode imm v64(Iv/Ov), Return 0 if failed */
453static int __get_immv(struct insn *insn)
454{
455 switch (insn->opnd_bytes) {
456 case 2:
457 insn->immediate1.value = get_next(short, insn);
458 insn->immediate1.nbytes = 2;
459 break;
460 case 4:
461 insn->immediate1.value = get_next(int, insn);
462 insn->immediate1.nbytes = 4;
463 break;
464 case 8:
465 insn->immediate1.value = get_next(int, insn);
466 insn->immediate1.nbytes = 4;
467 insn->immediate2.value = get_next(int, insn);
468 insn->immediate2.nbytes = 4;
469 break;
470 default: /* opnd_bytes must be modified manually */
471 goto err_out;
472 }
473 insn->immediate1.got = insn->immediate2.got = 1;
474
475 return 1;
476err_out:
477 return 0;
478}
479
480/* Decode ptr16:16/32(Ap) */
481static int __get_immptr(struct insn *insn)
482{
483 switch (insn->opnd_bytes) {
484 case 2:
485 insn->immediate1.value = get_next(short, insn);
486 insn->immediate1.nbytes = 2;
487 break;
488 case 4:
489 insn->immediate1.value = get_next(int, insn);
490 insn->immediate1.nbytes = 4;
491 break;
492 case 8:
493 /* ptr16:64 is not exist (no segment) */
494 return 0;
495 default: /* opnd_bytes must be modified manually */
496 goto err_out;
497 }
498 insn->immediate2.value = get_next(unsigned short, insn);
499 insn->immediate2.nbytes = 2;
500 insn->immediate1.got = insn->immediate2.got = 1;
501
502 return 1;
503err_out:
504 return 0;
505}
506
507/**
508 * insn_get_immediate() - Get the immediates of instruction
509 * @insn: &struct insn containing instruction
510 *
511 * If necessary, first collects the instruction up to and including the
512 * displacement bytes.
513 * Basically, most of immediates are sign-expanded. Unsigned-value can be
514 * get by bit masking with ((1 << (nbytes * 8)) - 1)
515 */
516void insn_get_immediate(struct insn *insn)
517{
518 if (insn->immediate.got)
519 return;
520 if (!insn->displacement.got)
521 insn_get_displacement(insn);
522
523 if (inat_has_moffset(insn->attr)) {
524 if (!__get_moffset(insn))
525 goto err_out;
526 goto done;
527 }
528
529 if (!inat_has_immediate(insn->attr))
530 /* no immediates */
531 goto done;
532
533 switch (inat_immediate_size(insn->attr)) {
534 case INAT_IMM_BYTE:
535 insn->immediate.value = get_next(char, insn);
536 insn->immediate.nbytes = 1;
537 break;
538 case INAT_IMM_WORD:
539 insn->immediate.value = get_next(short, insn);
540 insn->immediate.nbytes = 2;
541 break;
542 case INAT_IMM_DWORD:
543 insn->immediate.value = get_next(int, insn);
544 insn->immediate.nbytes = 4;
545 break;
546 case INAT_IMM_QWORD:
547 insn->immediate1.value = get_next(int, insn);
548 insn->immediate1.nbytes = 4;
549 insn->immediate2.value = get_next(int, insn);
550 insn->immediate2.nbytes = 4;
551 break;
552 case INAT_IMM_PTR:
553 if (!__get_immptr(insn))
554 goto err_out;
555 break;
556 case INAT_IMM_VWORD32:
557 if (!__get_immv32(insn))
558 goto err_out;
559 break;
560 case INAT_IMM_VWORD:
561 if (!__get_immv(insn))
562 goto err_out;
563 break;
564 default:
565 /* Here, insn must have an immediate, but failed */
566 goto err_out;
567 }
568 if (inat_has_second_immediate(insn->attr)) {
569 insn->immediate2.value = get_next(char, insn);
570 insn->immediate2.nbytes = 1;
571 }
572done:
573 insn->immediate.got = 1;
574
575err_out:
576 return;
577}
578
579/**
580 * insn_get_length() - Get the length of instruction
581 * @insn: &struct insn containing instruction
582 *
583 * If necessary, first collects the instruction up to and including the
584 * immediates bytes.
585 */
586void insn_get_length(struct insn *insn)
587{
588 if (insn->length)
589 return;
590 if (!insn->immediate.got)
591 insn_get_immediate(insn);
592 insn->length = (unsigned char)((unsigned long)insn->next_byte
593 - (unsigned long)insn->kaddr);
594}
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
new file mode 100644
index 000000000000..dd12da0f4593
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -0,0 +1,201 @@
1#ifndef _ASM_X86_INSN_H
2#define _ASM_X86_INSN_H
3/*
4 * x86 instruction analysis
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2009
21 */
22
23/* insn_attr_t is defined in inat.h */
24#include "inat.h"
25
26struct insn_field {
27 union {
28 insn_value_t value;
29 insn_byte_t bytes[4];
30 };
31 /* !0 if we've run insn_get_xxx() for this field */
32 unsigned char got;
33 unsigned char nbytes;
34};
35
36struct insn {
37 struct insn_field prefixes; /*
38 * Prefixes
39 * prefixes.bytes[3]: last prefix
40 */
41 struct insn_field rex_prefix; /* REX prefix */
42 struct insn_field vex_prefix; /* VEX prefix */
43 struct insn_field opcode; /*
44 * opcode.bytes[0]: opcode1
45 * opcode.bytes[1]: opcode2
46 * opcode.bytes[2]: opcode3
47 */
48 struct insn_field modrm;
49 struct insn_field sib;
50 struct insn_field displacement;
51 union {
52 struct insn_field immediate;
53 struct insn_field moffset1; /* for 64bit MOV */
54 struct insn_field immediate1; /* for 64bit imm or off16/32 */
55 };
56 union {
57 struct insn_field moffset2; /* for 64bit MOV */
58 struct insn_field immediate2; /* for 64bit imm or seg16 */
59 };
60
61 insn_attr_t attr;
62 unsigned char opnd_bytes;
63 unsigned char addr_bytes;
64 unsigned char length;
65 unsigned char x86_64;
66
67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */
68 const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
69 const insn_byte_t *next_byte;
70};
71
72#define MAX_INSN_SIZE 15
73
74#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
75#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
76#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
77
78#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
79#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
80#define X86_SIB_BASE(sib) ((sib) & 0x07)
81
82#define X86_REX_W(rex) ((rex) & 8)
83#define X86_REX_R(rex) ((rex) & 4)
84#define X86_REX_X(rex) ((rex) & 2)
85#define X86_REX_B(rex) ((rex) & 1)
86
87/* VEX bit flags */
88#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
89#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
90#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
91#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
92#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
93/* VEX bit fields */
94#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
95#define X86_VEX2_M 1 /* VEX2.M always 1 */
96#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
97#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
98#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
99
100extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
101extern void insn_get_prefixes(struct insn *insn);
102extern void insn_get_opcode(struct insn *insn);
103extern void insn_get_modrm(struct insn *insn);
104extern void insn_get_sib(struct insn *insn);
105extern void insn_get_displacement(struct insn *insn);
106extern void insn_get_immediate(struct insn *insn);
107extern void insn_get_length(struct insn *insn);
108
109/* Attribute will be determined after getting ModRM (for opcode groups) */
110static inline void insn_get_attribute(struct insn *insn)
111{
112 insn_get_modrm(insn);
113}
114
115/* Instruction uses RIP-relative addressing */
116extern int insn_rip_relative(struct insn *insn);
117
118/* Init insn for kernel text */
119static inline void kernel_insn_init(struct insn *insn,
120 const void *kaddr, int buf_len)
121{
122#ifdef CONFIG_X86_64
123 insn_init(insn, kaddr, buf_len, 1);
124#else /* CONFIG_X86_32 */
125 insn_init(insn, kaddr, buf_len, 0);
126#endif
127}
128
129static inline int insn_is_avx(struct insn *insn)
130{
131 if (!insn->prefixes.got)
132 insn_get_prefixes(insn);
133 return (insn->vex_prefix.value != 0);
134}
135
136/* Ensure this instruction is decoded completely */
137static inline int insn_complete(struct insn *insn)
138{
139 return insn->opcode.got && insn->modrm.got && insn->sib.got &&
140 insn->displacement.got && insn->immediate.got;
141}
142
143static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
144{
145 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
146 return X86_VEX2_M;
147 else
148 return X86_VEX3_M(insn->vex_prefix.bytes[1]);
149}
150
151static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
152{
153 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
154 return X86_VEX_P(insn->vex_prefix.bytes[1]);
155 else
156 return X86_VEX_P(insn->vex_prefix.bytes[2]);
157}
158
159/* Get the last prefix id from last prefix or VEX prefix */
160static inline int insn_last_prefix_id(struct insn *insn)
161{
162 if (insn_is_avx(insn))
163 return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
164
165 if (insn->prefixes.bytes[3])
166 return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
167
168 return 0;
169}
170
171/* Offset of each field from kaddr */
172static inline int insn_offset_rex_prefix(struct insn *insn)
173{
174 return insn->prefixes.nbytes;
175}
176static inline int insn_offset_vex_prefix(struct insn *insn)
177{
178 return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
179}
180static inline int insn_offset_opcode(struct insn *insn)
181{
182 return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
183}
184static inline int insn_offset_modrm(struct insn *insn)
185{
186 return insn_offset_opcode(insn) + insn->opcode.nbytes;
187}
188static inline int insn_offset_sib(struct insn *insn)
189{
190 return insn_offset_modrm(insn) + insn->modrm.nbytes;
191}
192static inline int insn_offset_displacement(struct insn *insn)
193{
194 return insn_offset_sib(insn) + insn->sib.nbytes;
195}
196static inline int insn_offset_immediate(struct insn *insn)
197{
198 return insn_offset_displacement(insn) + insn->displacement.nbytes;
199}
200
201#endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 000000000000..22ba50224319
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,2345 @@
1/*
2 * intel_pt_decoder.c: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef _GNU_SOURCE
17#define _GNU_SOURCE
18#endif
19#include <stdlib.h>
20#include <stdbool.h>
21#include <string.h>
22#include <errno.h>
23#include <stdint.h>
24#include <inttypes.h>
25
26#include "../cache.h"
27#include "../util.h"
28
29#include "intel-pt-insn-decoder.h"
30#include "intel-pt-pkt-decoder.h"
31#include "intel-pt-decoder.h"
32#include "intel-pt-log.h"
33
34#define INTEL_PT_BLK_SIZE 1024
35
36#define BIT63 (((uint64_t)1 << 63))
37
38#define INTEL_PT_RETURN 1
39
40/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
41#define INTEL_PT_MAX_LOOPS 10000
42
43struct intel_pt_blk {
44 struct intel_pt_blk *prev;
45 uint64_t ip[INTEL_PT_BLK_SIZE];
46};
47
48struct intel_pt_stack {
49 struct intel_pt_blk *blk;
50 struct intel_pt_blk *spare;
51 int pos;
52};
53
54enum intel_pt_pkt_state {
55 INTEL_PT_STATE_NO_PSB,
56 INTEL_PT_STATE_NO_IP,
57 INTEL_PT_STATE_ERR_RESYNC,
58 INTEL_PT_STATE_IN_SYNC,
59 INTEL_PT_STATE_TNT,
60 INTEL_PT_STATE_TIP,
61 INTEL_PT_STATE_TIP_PGD,
62 INTEL_PT_STATE_FUP,
63 INTEL_PT_STATE_FUP_NO_TIP,
64};
65
66#ifdef INTEL_PT_STRICT
67#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
68#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
69#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
70#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
71#else
72#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
73#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
74#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
75#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
76#endif
77
78struct intel_pt_decoder {
79 int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
80 int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
81 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
82 uint64_t max_insn_cnt, void *data);
83 void *data;
84 struct intel_pt_state state;
85 const unsigned char *buf;
86 size_t len;
87 bool return_compression;
88 bool mtc_insn;
89 bool pge;
90 bool have_tma;
91 bool have_cyc;
92 uint64_t pos;
93 uint64_t last_ip;
94 uint64_t ip;
95 uint64_t cr3;
96 uint64_t timestamp;
97 uint64_t tsc_timestamp;
98 uint64_t ref_timestamp;
99 uint64_t ret_addr;
100 uint64_t ctc_timestamp;
101 uint64_t ctc_delta;
102 uint64_t cycle_cnt;
103 uint64_t cyc_ref_timestamp;
104 uint32_t last_mtc;
105 uint32_t tsc_ctc_ratio_n;
106 uint32_t tsc_ctc_ratio_d;
107 uint32_t tsc_ctc_mult;
108 uint32_t tsc_slip;
109 uint32_t ctc_rem_mask;
110 int mtc_shift;
111 struct intel_pt_stack stack;
112 enum intel_pt_pkt_state pkt_state;
113 struct intel_pt_pkt packet;
114 struct intel_pt_pkt tnt;
115 int pkt_step;
116 int pkt_len;
117 int last_packet_type;
118 unsigned int cbr;
119 unsigned int max_non_turbo_ratio;
120 double max_non_turbo_ratio_fp;
121 double cbr_cyc_to_tsc;
122 double calc_cyc_to_tsc;
123 bool have_calc_cyc_to_tsc;
124 int exec_mode;
125 unsigned int insn_bytes;
126 uint64_t sign_bit;
127 uint64_t sign_bits;
128 uint64_t period;
129 enum intel_pt_period_type period_type;
130 uint64_t tot_insn_cnt;
131 uint64_t period_insn_cnt;
132 uint64_t period_mask;
133 uint64_t period_ticks;
134 uint64_t last_masked_timestamp;
135 bool continuous_period;
136 bool overflow;
137 bool set_fup_tx_flags;
138 unsigned int fup_tx_flags;
139 unsigned int tx_flags;
140 uint64_t timestamp_insn_cnt;
141 uint64_t stuck_ip;
142 int no_progress;
143 int stuck_ip_prd;
144 int stuck_ip_cnt;
145 const unsigned char *next_buf;
146 size_t next_len;
147 unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
148};
149
150static uint64_t intel_pt_lower_power_of_2(uint64_t x)
151{
152 int i;
153
154 for (i = 0; x != 1; i++)
155 x >>= 1;
156
157 return x << i;
158}
159
160static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
161{
162 if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
163 uint64_t period;
164
165 period = intel_pt_lower_power_of_2(decoder->period);
166 decoder->period_mask = ~(period - 1);
167 decoder->period_ticks = period;
168 }
169}
170
171static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
172{
173 if (!d)
174 return 0;
175 return (t / d) * n + ((t % d) * n) / d;
176}
177
178struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
179{
180 struct intel_pt_decoder *decoder;
181
182 if (!params->get_trace || !params->walk_insn)
183 return NULL;
184
185 decoder = zalloc(sizeof(struct intel_pt_decoder));
186 if (!decoder)
187 return NULL;
188
189 decoder->get_trace = params->get_trace;
190 decoder->walk_insn = params->walk_insn;
191 decoder->data = params->data;
192 decoder->return_compression = params->return_compression;
193
194 decoder->sign_bit = (uint64_t)1 << 47;
195 decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
196
197 decoder->period = params->period;
198 decoder->period_type = params->period_type;
199
200 decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
201 decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
202
203 intel_pt_setup_period(decoder);
204
205 decoder->mtc_shift = params->mtc_period;
206 decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
207
208 decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
209 decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
210
211 if (!decoder->tsc_ctc_ratio_n)
212 decoder->tsc_ctc_ratio_d = 0;
213
214 if (decoder->tsc_ctc_ratio_d) {
215 if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
216 decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
217 decoder->tsc_ctc_ratio_d;
218
219 /*
220 * Allow for timestamps appearing to backwards because a TSC
221 * packet has slipped past a MTC packet, so allow 2 MTC ticks
222 * or ...
223 */
224 decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
225 decoder->tsc_ctc_ratio_n,
226 decoder->tsc_ctc_ratio_d);
227 }
228 /* ... or 0x100 paranoia */
229 if (decoder->tsc_slip < 0x100)
230 decoder->tsc_slip = 0x100;
231
232 intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
233 intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
234 intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
235 intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
236 intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
237
238 return decoder;
239}
240
241static void intel_pt_pop_blk(struct intel_pt_stack *stack)
242{
243 struct intel_pt_blk *blk = stack->blk;
244
245 stack->blk = blk->prev;
246 if (!stack->spare)
247 stack->spare = blk;
248 else
249 free(blk);
250}
251
252static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
253{
254 if (!stack->pos) {
255 if (!stack->blk)
256 return 0;
257 intel_pt_pop_blk(stack);
258 if (!stack->blk)
259 return 0;
260 stack->pos = INTEL_PT_BLK_SIZE;
261 }
262 return stack->blk->ip[--stack->pos];
263}
264
265static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
266{
267 struct intel_pt_blk *blk;
268
269 if (stack->spare) {
270 blk = stack->spare;
271 stack->spare = NULL;
272 } else {
273 blk = malloc(sizeof(struct intel_pt_blk));
274 if (!blk)
275 return -ENOMEM;
276 }
277
278 blk->prev = stack->blk;
279 stack->blk = blk;
280 stack->pos = 0;
281 return 0;
282}
283
284static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
285{
286 int err;
287
288 if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
289 err = intel_pt_alloc_blk(stack);
290 if (err)
291 return err;
292 }
293
294 stack->blk->ip[stack->pos++] = ip;
295 return 0;
296}
297
298static void intel_pt_clear_stack(struct intel_pt_stack *stack)
299{
300 while (stack->blk)
301 intel_pt_pop_blk(stack);
302 stack->pos = 0;
303}
304
305static void intel_pt_free_stack(struct intel_pt_stack *stack)
306{
307 intel_pt_clear_stack(stack);
308 zfree(&stack->blk);
309 zfree(&stack->spare);
310}
311
312void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
313{
314 intel_pt_free_stack(&decoder->stack);
315 free(decoder);
316}
317
318static int intel_pt_ext_err(int code)
319{
320 switch (code) {
321 case -ENOMEM:
322 return INTEL_PT_ERR_NOMEM;
323 case -ENOSYS:
324 return INTEL_PT_ERR_INTERN;
325 case -EBADMSG:
326 return INTEL_PT_ERR_BADPKT;
327 case -ENODATA:
328 return INTEL_PT_ERR_NODATA;
329 case -EILSEQ:
330 return INTEL_PT_ERR_NOINSN;
331 case -ENOENT:
332 return INTEL_PT_ERR_MISMAT;
333 case -EOVERFLOW:
334 return INTEL_PT_ERR_OVR;
335 case -ENOSPC:
336 return INTEL_PT_ERR_LOST;
337 case -ELOOP:
338 return INTEL_PT_ERR_NELOOP;
339 default:
340 return INTEL_PT_ERR_UNK;
341 }
342}
343
344static const char *intel_pt_err_msgs[] = {
345 [INTEL_PT_ERR_NOMEM] = "Memory allocation failed",
346 [INTEL_PT_ERR_INTERN] = "Internal error",
347 [INTEL_PT_ERR_BADPKT] = "Bad packet",
348 [INTEL_PT_ERR_NODATA] = "No more data",
349 [INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
350 [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
351 [INTEL_PT_ERR_OVR] = "Overflow packet",
352 [INTEL_PT_ERR_LOST] = "Lost trace data",
353 [INTEL_PT_ERR_UNK] = "Unknown error!",
354 [INTEL_PT_ERR_NELOOP] = "Never-ending loop",
355};
356
357int intel_pt__strerror(int code, char *buf, size_t buflen)
358{
359 if (code < 1 || code > INTEL_PT_ERR_MAX)
360 code = INTEL_PT_ERR_UNK;
361 strlcpy(buf, intel_pt_err_msgs[code], buflen);
362 return 0;
363}
364
365static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
366 const struct intel_pt_pkt *packet,
367 uint64_t last_ip)
368{
369 uint64_t ip;
370
371 switch (packet->count) {
372 case 2:
373 ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
374 packet->payload;
375 break;
376 case 4:
377 ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
378 packet->payload;
379 break;
380 case 6:
381 ip = packet->payload;
382 break;
383 default:
384 return 0;
385 }
386
387 if (ip & decoder->sign_bit)
388 return ip | decoder->sign_bits;
389
390 return ip;
391}
392
393static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
394{
395 decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
396 decoder->last_ip);
397}
398
399static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
400{
401 intel_pt_set_last_ip(decoder);
402 decoder->ip = decoder->last_ip;
403}
404
405static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
406{
407 intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
408 decoder->buf);
409}
410
411static int intel_pt_bug(struct intel_pt_decoder *decoder)
412{
413 intel_pt_log("ERROR: Internal error\n");
414 decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
415 return -ENOSYS;
416}
417
418static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
419{
420 decoder->tx_flags = 0;
421}
422
423static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
424{
425 decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
426}
427
428static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
429{
430 intel_pt_clear_tx_flags(decoder);
431 decoder->have_tma = false;
432 decoder->pkt_len = 1;
433 decoder->pkt_step = 1;
434 intel_pt_decoder_log_packet(decoder);
435 if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
436 intel_pt_log("ERROR: Bad packet\n");
437 decoder->pkt_state = INTEL_PT_STATE_ERR1;
438 }
439 return -EBADMSG;
440}
441
442static int intel_pt_get_data(struct intel_pt_decoder *decoder)
443{
444 struct intel_pt_buffer buffer = { .buf = 0, };
445 int ret;
446
447 decoder->pkt_step = 0;
448
449 intel_pt_log("Getting more data\n");
450 ret = decoder->get_trace(&buffer, decoder->data);
451 if (ret)
452 return ret;
453 decoder->buf = buffer.buf;
454 decoder->len = buffer.len;
455 if (!decoder->len) {
456 intel_pt_log("No more data\n");
457 return -ENODATA;
458 }
459 if (!buffer.consecutive) {
460 decoder->ip = 0;
461 decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
462 decoder->ref_timestamp = buffer.ref_timestamp;
463 decoder->timestamp = 0;
464 decoder->have_tma = false;
465 decoder->state.trace_nr = buffer.trace_nr;
466 intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
467 decoder->ref_timestamp);
468 return -ENOLINK;
469 }
470
471 return 0;
472}
473
474static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
475{
476 if (!decoder->next_buf)
477 return intel_pt_get_data(decoder);
478
479 decoder->buf = decoder->next_buf;
480 decoder->len = decoder->next_len;
481 decoder->next_buf = 0;
482 decoder->next_len = 0;
483 return 0;
484}
485
486static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
487{
488 unsigned char *buf = decoder->temp_buf;
489 size_t old_len, len, n;
490 int ret;
491
492 old_len = decoder->len;
493 len = decoder->len;
494 memcpy(buf, decoder->buf, len);
495
496 ret = intel_pt_get_data(decoder);
497 if (ret) {
498 decoder->pos += old_len;
499 return ret < 0 ? ret : -EINVAL;
500 }
501
502 n = INTEL_PT_PKT_MAX_SZ - len;
503 if (n > decoder->len)
504 n = decoder->len;
505 memcpy(buf + len, decoder->buf, n);
506 len += n;
507
508 ret = intel_pt_get_packet(buf, len, &decoder->packet);
509 if (ret < (int)old_len) {
510 decoder->next_buf = decoder->buf;
511 decoder->next_len = decoder->len;
512 decoder->buf = buf;
513 decoder->len = old_len;
514 return intel_pt_bad_packet(decoder);
515 }
516
517 decoder->next_buf = decoder->buf + (ret - old_len);
518 decoder->next_len = decoder->len - (ret - old_len);
519
520 decoder->buf = buf;
521 decoder->len = ret;
522
523 return ret;
524}
525
526struct intel_pt_pkt_info {
527 struct intel_pt_decoder *decoder;
528 struct intel_pt_pkt packet;
529 uint64_t pos;
530 int pkt_len;
531 int last_packet_type;
532 void *data;
533};
534
535typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
536
537/* Lookahead packets in current buffer */
538static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
539 intel_pt_pkt_cb_t cb, void *data)
540{
541 struct intel_pt_pkt_info pkt_info;
542 const unsigned char *buf = decoder->buf;
543 size_t len = decoder->len;
544 int ret;
545
546 pkt_info.decoder = decoder;
547 pkt_info.pos = decoder->pos;
548 pkt_info.pkt_len = decoder->pkt_step;
549 pkt_info.last_packet_type = decoder->last_packet_type;
550 pkt_info.data = data;
551
552 while (1) {
553 do {
554 pkt_info.pos += pkt_info.pkt_len;
555 buf += pkt_info.pkt_len;
556 len -= pkt_info.pkt_len;
557
558 if (!len)
559 return INTEL_PT_NEED_MORE_BYTES;
560
561 ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
562 if (!ret)
563 return INTEL_PT_NEED_MORE_BYTES;
564 if (ret < 0)
565 return ret;
566
567 pkt_info.pkt_len = ret;
568 } while (pkt_info.packet.type == INTEL_PT_PAD);
569
570 ret = cb(&pkt_info);
571 if (ret)
572 return 0;
573
574 pkt_info.last_packet_type = pkt_info.packet.type;
575 }
576}
577
578struct intel_pt_calc_cyc_to_tsc_info {
579 uint64_t cycle_cnt;
580 unsigned int cbr;
581 uint32_t last_mtc;
582 uint64_t ctc_timestamp;
583 uint64_t ctc_delta;
584 uint64_t tsc_timestamp;
585 uint64_t timestamp;
586 bool have_tma;
587 bool from_mtc;
588 double cbr_cyc_to_tsc;
589};
590
591static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
592{
593 struct intel_pt_decoder *decoder = pkt_info->decoder;
594 struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
595 uint64_t timestamp;
596 double cyc_to_tsc;
597 unsigned int cbr;
598 uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
599
600 switch (pkt_info->packet.type) {
601 case INTEL_PT_TNT:
602 case INTEL_PT_TIP_PGE:
603 case INTEL_PT_TIP:
604 case INTEL_PT_FUP:
605 case INTEL_PT_PSB:
606 case INTEL_PT_PIP:
607 case INTEL_PT_MODE_EXEC:
608 case INTEL_PT_MODE_TSX:
609 case INTEL_PT_PSBEND:
610 case INTEL_PT_PAD:
611 case INTEL_PT_VMCS:
612 case INTEL_PT_MNT:
613 return 0;
614
615 case INTEL_PT_MTC:
616 if (!data->have_tma)
617 return 0;
618
619 mtc = pkt_info->packet.payload;
620 if (mtc > data->last_mtc)
621 mtc_delta = mtc - data->last_mtc;
622 else
623 mtc_delta = mtc + 256 - data->last_mtc;
624 data->ctc_delta += mtc_delta << decoder->mtc_shift;
625 data->last_mtc = mtc;
626
627 if (decoder->tsc_ctc_mult) {
628 timestamp = data->ctc_timestamp +
629 data->ctc_delta * decoder->tsc_ctc_mult;
630 } else {
631 timestamp = data->ctc_timestamp +
632 multdiv(data->ctc_delta,
633 decoder->tsc_ctc_ratio_n,
634 decoder->tsc_ctc_ratio_d);
635 }
636
637 if (timestamp < data->timestamp)
638 return 1;
639
640 if (pkt_info->last_packet_type != INTEL_PT_CYC) {
641 data->timestamp = timestamp;
642 return 0;
643 }
644
645 break;
646
647 case INTEL_PT_TSC:
648 timestamp = pkt_info->packet.payload |
649 (data->timestamp & (0xffULL << 56));
650 if (data->from_mtc && timestamp < data->timestamp &&
651 data->timestamp - timestamp < decoder->tsc_slip)
652 return 1;
653 while (timestamp < data->timestamp)
654 timestamp += (1ULL << 56);
655 if (pkt_info->last_packet_type != INTEL_PT_CYC) {
656 if (data->from_mtc)
657 return 1;
658 data->tsc_timestamp = timestamp;
659 data->timestamp = timestamp;
660 return 0;
661 }
662 break;
663
664 case INTEL_PT_TMA:
665 if (data->from_mtc)
666 return 1;
667
668 if (!decoder->tsc_ctc_ratio_d)
669 return 0;
670
671 ctc = pkt_info->packet.payload;
672 fc = pkt_info->packet.count;
673 ctc_rem = ctc & decoder->ctc_rem_mask;
674
675 data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
676
677 data->ctc_timestamp = data->tsc_timestamp - fc;
678 if (decoder->tsc_ctc_mult) {
679 data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
680 } else {
681 data->ctc_timestamp -=
682 multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
683 decoder->tsc_ctc_ratio_d);
684 }
685
686 data->ctc_delta = 0;
687 data->have_tma = true;
688
689 return 0;
690
691 case INTEL_PT_CYC:
692 data->cycle_cnt += pkt_info->packet.payload;
693 return 0;
694
695 case INTEL_PT_CBR:
696 cbr = pkt_info->packet.payload;
697 if (data->cbr && data->cbr != cbr)
698 return 1;
699 data->cbr = cbr;
700 data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
701 return 0;
702
703 case INTEL_PT_TIP_PGD:
704 case INTEL_PT_TRACESTOP:
705 case INTEL_PT_OVF:
706 case INTEL_PT_BAD: /* Does not happen */
707 default:
708 return 1;
709 }
710
711 if (!data->cbr && decoder->cbr) {
712 data->cbr = decoder->cbr;
713 data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
714 }
715
716 if (!data->cycle_cnt)
717 return 1;
718
719 cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
720
721 if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
722 cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
723 intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
724 cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
725 return 1;
726 }
727
728 decoder->calc_cyc_to_tsc = cyc_to_tsc;
729 decoder->have_calc_cyc_to_tsc = true;
730
731 if (data->cbr) {
732 intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
733 cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
734 } else {
735 intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
736 cyc_to_tsc, pkt_info->pos);
737 }
738
739 return 1;
740}
741
742static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
743 bool from_mtc)
744{
745 struct intel_pt_calc_cyc_to_tsc_info data = {
746 .cycle_cnt = 0,
747 .cbr = 0,
748 .last_mtc = decoder->last_mtc,
749 .ctc_timestamp = decoder->ctc_timestamp,
750 .ctc_delta = decoder->ctc_delta,
751 .tsc_timestamp = decoder->tsc_timestamp,
752 .timestamp = decoder->timestamp,
753 .have_tma = decoder->have_tma,
754 .from_mtc = from_mtc,
755 .cbr_cyc_to_tsc = 0,
756 };
757
758 intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
759}
760
761static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
762{
763 int ret;
764
765 decoder->last_packet_type = decoder->packet.type;
766
767 do {
768 decoder->pos += decoder->pkt_step;
769 decoder->buf += decoder->pkt_step;
770 decoder->len -= decoder->pkt_step;
771
772 if (!decoder->len) {
773 ret = intel_pt_get_next_data(decoder);
774 if (ret)
775 return ret;
776 }
777
778 ret = intel_pt_get_packet(decoder->buf, decoder->len,
779 &decoder->packet);
780 if (ret == INTEL_PT_NEED_MORE_BYTES &&
781 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
782 ret = intel_pt_get_split_packet(decoder);
783 if (ret < 0)
784 return ret;
785 }
786 if (ret <= 0)
787 return intel_pt_bad_packet(decoder);
788
789 decoder->pkt_len = ret;
790 decoder->pkt_step = ret;
791 intel_pt_decoder_log_packet(decoder);
792 } while (decoder->packet.type == INTEL_PT_PAD);
793
794 return 0;
795}
796
797static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
798{
799 uint64_t timestamp, masked_timestamp;
800
801 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
802 masked_timestamp = timestamp & decoder->period_mask;
803 if (decoder->continuous_period) {
804 if (masked_timestamp != decoder->last_masked_timestamp)
805 return 1;
806 } else {
807 timestamp += 1;
808 masked_timestamp = timestamp & decoder->period_mask;
809 if (masked_timestamp != decoder->last_masked_timestamp) {
810 decoder->last_masked_timestamp = masked_timestamp;
811 decoder->continuous_period = true;
812 }
813 }
814 return decoder->period_ticks - (timestamp - masked_timestamp);
815}
816
817static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
818{
819 switch (decoder->period_type) {
820 case INTEL_PT_PERIOD_INSTRUCTIONS:
821 return decoder->period - decoder->period_insn_cnt;
822 case INTEL_PT_PERIOD_TICKS:
823 return intel_pt_next_period(decoder);
824 case INTEL_PT_PERIOD_NONE:
825 case INTEL_PT_PERIOD_MTC:
826 default:
827 return 0;
828 }
829}
830
831static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
832{
833 uint64_t timestamp, masked_timestamp;
834
835 switch (decoder->period_type) {
836 case INTEL_PT_PERIOD_INSTRUCTIONS:
837 decoder->period_insn_cnt = 0;
838 break;
839 case INTEL_PT_PERIOD_TICKS:
840 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
841 masked_timestamp = timestamp & decoder->period_mask;
842 decoder->last_masked_timestamp = masked_timestamp;
843 break;
844 case INTEL_PT_PERIOD_NONE:
845 case INTEL_PT_PERIOD_MTC:
846 default:
847 break;
848 }
849
850 decoder->state.type |= INTEL_PT_INSTRUCTION;
851}
852
853static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
854 struct intel_pt_insn *intel_pt_insn, uint64_t ip)
855{
856 uint64_t max_insn_cnt, insn_cnt = 0;
857 int err;
858
859 if (!decoder->mtc_insn)
860 decoder->mtc_insn = true;
861
862 max_insn_cnt = intel_pt_next_sample(decoder);
863
864 err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
865 max_insn_cnt, decoder->data);
866
867 decoder->tot_insn_cnt += insn_cnt;
868 decoder->timestamp_insn_cnt += insn_cnt;
869 decoder->period_insn_cnt += insn_cnt;
870
871 if (err) {
872 decoder->no_progress = 0;
873 decoder->pkt_state = INTEL_PT_STATE_ERR2;
874 intel_pt_log_at("ERROR: Failed to get instruction",
875 decoder->ip);
876 if (err == -ENOENT)
877 return -ENOLINK;
878 return -EILSEQ;
879 }
880
881 if (ip && decoder->ip == ip) {
882 err = -EAGAIN;
883 goto out;
884 }
885
886 if (max_insn_cnt && insn_cnt >= max_insn_cnt)
887 intel_pt_sample_insn(decoder);
888
889 if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
890 decoder->state.type = INTEL_PT_INSTRUCTION;
891 decoder->state.from_ip = decoder->ip;
892 decoder->state.to_ip = 0;
893 decoder->ip += intel_pt_insn->length;
894 err = INTEL_PT_RETURN;
895 goto out;
896 }
897
898 if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
899 /* Zero-length calls are excluded */
900 if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
901 intel_pt_insn->rel) {
902 err = intel_pt_push(&decoder->stack, decoder->ip +
903 intel_pt_insn->length);
904 if (err)
905 goto out;
906 }
907 } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
908 decoder->ret_addr = intel_pt_pop(&decoder->stack);
909 }
910
911 if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
912 int cnt = decoder->no_progress++;
913
914 decoder->state.from_ip = decoder->ip;
915 decoder->ip += intel_pt_insn->length +
916 intel_pt_insn->rel;
917 decoder->state.to_ip = decoder->ip;
918 err = INTEL_PT_RETURN;
919
920 /*
921 * Check for being stuck in a loop. This can happen if a
922 * decoder error results in the decoder erroneously setting the
923 * ip to an address that is itself in an infinite loop that
924 * consumes no packets. When that happens, there must be an
925 * unconditional branch.
926 */
927 if (cnt) {
928 if (cnt == 1) {
929 decoder->stuck_ip = decoder->state.to_ip;
930 decoder->stuck_ip_prd = 1;
931 decoder->stuck_ip_cnt = 1;
932 } else if (cnt > INTEL_PT_MAX_LOOPS ||
933 decoder->state.to_ip == decoder->stuck_ip) {
934 intel_pt_log_at("ERROR: Never-ending loop",
935 decoder->state.to_ip);
936 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
937 err = -ELOOP;
938 goto out;
939 } else if (!--decoder->stuck_ip_cnt) {
940 decoder->stuck_ip_prd += 1;
941 decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
942 decoder->stuck_ip = decoder->state.to_ip;
943 }
944 }
945 goto out_no_progress;
946 }
947out:
948 decoder->no_progress = 0;
949out_no_progress:
950 decoder->state.insn_op = intel_pt_insn->op;
951 decoder->state.insn_len = intel_pt_insn->length;
952
953 if (decoder->tx_flags & INTEL_PT_IN_TX)
954 decoder->state.flags |= INTEL_PT_IN_TX;
955
956 return err;
957}
958
959static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
960{
961 struct intel_pt_insn intel_pt_insn;
962 uint64_t ip;
963 int err;
964
965 ip = decoder->last_ip;
966
967 while (1) {
968 err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
969 if (err == INTEL_PT_RETURN)
970 return 0;
971 if (err == -EAGAIN) {
972 if (decoder->set_fup_tx_flags) {
973 decoder->set_fup_tx_flags = false;
974 decoder->tx_flags = decoder->fup_tx_flags;
975 decoder->state.type = INTEL_PT_TRANSACTION;
976 decoder->state.from_ip = decoder->ip;
977 decoder->state.to_ip = 0;
978 decoder->state.flags = decoder->fup_tx_flags;
979 return 0;
980 }
981 return err;
982 }
983 decoder->set_fup_tx_flags = false;
984 if (err)
985 return err;
986
987 if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
988 intel_pt_log_at("ERROR: Unexpected indirect branch",
989 decoder->ip);
990 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
991 return -ENOENT;
992 }
993
994 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
995 intel_pt_log_at("ERROR: Unexpected conditional branch",
996 decoder->ip);
997 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
998 return -ENOENT;
999 }
1000
1001 intel_pt_bug(decoder);
1002 }
1003}
1004
1005static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
1006{
1007 struct intel_pt_insn intel_pt_insn;
1008 int err;
1009
1010 err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
1011 if (err == INTEL_PT_RETURN)
1012 return 0;
1013 if (err)
1014 return err;
1015
1016 if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1017 if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
1018 decoder->pge = false;
1019 decoder->continuous_period = false;
1020 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1021 decoder->state.from_ip = decoder->ip;
1022 decoder->state.to_ip = 0;
1023 if (decoder->packet.count != 0)
1024 decoder->ip = decoder->last_ip;
1025 } else {
1026 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1027 decoder->state.from_ip = decoder->ip;
1028 if (decoder->packet.count == 0) {
1029 decoder->state.to_ip = 0;
1030 } else {
1031 decoder->state.to_ip = decoder->last_ip;
1032 decoder->ip = decoder->last_ip;
1033 }
1034 }
1035 return 0;
1036 }
1037
1038 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1039 intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
1040 decoder->ip);
1041 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1042 return -ENOENT;
1043 }
1044
1045 return intel_pt_bug(decoder);
1046}
1047
1048static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1049{
1050 struct intel_pt_insn intel_pt_insn;
1051 int err;
1052
1053 while (1) {
1054 err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
1055 if (err == INTEL_PT_RETURN)
1056 return 0;
1057 if (err)
1058 return err;
1059
1060 if (intel_pt_insn.op == INTEL_PT_OP_RET) {
1061 if (!decoder->return_compression) {
1062 intel_pt_log_at("ERROR: RET when expecting conditional branch",
1063 decoder->ip);
1064 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1065 return -ENOENT;
1066 }
1067 if (!decoder->ret_addr) {
1068 intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
1069 decoder->ip);
1070 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1071 return -ENOENT;
1072 }
1073 if (!(decoder->tnt.payload & BIT63)) {
1074 intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
1075 decoder->ip);
1076 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1077 return -ENOENT;
1078 }
1079 decoder->tnt.count -= 1;
1080 if (!decoder->tnt.count)
1081 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1082 decoder->tnt.payload <<= 1;
1083 decoder->state.from_ip = decoder->ip;
1084 decoder->ip = decoder->ret_addr;
1085 decoder->state.to_ip = decoder->ip;
1086 return 0;
1087 }
1088
1089 if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1090 /* Handle deferred TIPs */
1091 err = intel_pt_get_next_packet(decoder);
1092 if (err)
1093 return err;
1094 if (decoder->packet.type != INTEL_PT_TIP ||
1095 decoder->packet.count == 0) {
1096 intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
1097 decoder->ip);
1098 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1099 decoder->pkt_step = 0;
1100 return -ENOENT;
1101 }
1102 intel_pt_set_last_ip(decoder);
1103 decoder->state.from_ip = decoder->ip;
1104 decoder->state.to_ip = decoder->last_ip;
1105 decoder->ip = decoder->last_ip;
1106 return 0;
1107 }
1108
1109 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1110 decoder->tnt.count -= 1;
1111 if (!decoder->tnt.count)
1112 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1113 if (decoder->tnt.payload & BIT63) {
1114 decoder->tnt.payload <<= 1;
1115 decoder->state.from_ip = decoder->ip;
1116 decoder->ip += intel_pt_insn.length +
1117 intel_pt_insn.rel;
1118 decoder->state.to_ip = decoder->ip;
1119 return 0;
1120 }
1121 /* Instruction sample for a non-taken branch */
1122 if (decoder->state.type & INTEL_PT_INSTRUCTION) {
1123 decoder->tnt.payload <<= 1;
1124 decoder->state.type = INTEL_PT_INSTRUCTION;
1125 decoder->state.from_ip = decoder->ip;
1126 decoder->state.to_ip = 0;
1127 decoder->ip += intel_pt_insn.length;
1128 return 0;
1129 }
1130 decoder->ip += intel_pt_insn.length;
1131 if (!decoder->tnt.count)
1132 return -EAGAIN;
1133 decoder->tnt.payload <<= 1;
1134 continue;
1135 }
1136
1137 return intel_pt_bug(decoder);
1138 }
1139}
1140
1141static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
1142{
1143 unsigned int fup_tx_flags;
1144 int err;
1145
1146 fup_tx_flags = decoder->packet.payload &
1147 (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
1148 err = intel_pt_get_next_packet(decoder);
1149 if (err)
1150 return err;
1151 if (decoder->packet.type == INTEL_PT_FUP) {
1152 decoder->fup_tx_flags = fup_tx_flags;
1153 decoder->set_fup_tx_flags = true;
1154 if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
1155 *no_tip = true;
1156 } else {
1157 intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
1158 decoder->pos);
1159 intel_pt_update_in_tx(decoder);
1160 }
1161 return 0;
1162}
1163
1164static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
1165{
1166 uint64_t timestamp;
1167
1168 decoder->have_tma = false;
1169
1170 if (decoder->ref_timestamp) {
1171 timestamp = decoder->packet.payload |
1172 (decoder->ref_timestamp & (0xffULL << 56));
1173 if (timestamp < decoder->ref_timestamp) {
1174 if (decoder->ref_timestamp - timestamp > (1ULL << 55))
1175 timestamp += (1ULL << 56);
1176 } else {
1177 if (timestamp - decoder->ref_timestamp > (1ULL << 55))
1178 timestamp -= (1ULL << 56);
1179 }
1180 decoder->tsc_timestamp = timestamp;
1181 decoder->timestamp = timestamp;
1182 decoder->ref_timestamp = 0;
1183 decoder->timestamp_insn_cnt = 0;
1184 } else if (decoder->timestamp) {
1185 timestamp = decoder->packet.payload |
1186 (decoder->timestamp & (0xffULL << 56));
1187 decoder->tsc_timestamp = timestamp;
1188 if (timestamp < decoder->timestamp &&
1189 decoder->timestamp - timestamp < decoder->tsc_slip) {
1190 intel_pt_log_to("Suppressing backwards timestamp",
1191 timestamp);
1192 timestamp = decoder->timestamp;
1193 }
1194 while (timestamp < decoder->timestamp) {
1195 intel_pt_log_to("Wraparound timestamp", timestamp);
1196 timestamp += (1ULL << 56);
1197 decoder->tsc_timestamp = timestamp;
1198 }
1199 decoder->timestamp = timestamp;
1200 decoder->timestamp_insn_cnt = 0;
1201 }
1202
1203 if (decoder->last_packet_type == INTEL_PT_CYC) {
1204 decoder->cyc_ref_timestamp = decoder->timestamp;
1205 decoder->cycle_cnt = 0;
1206 decoder->have_calc_cyc_to_tsc = false;
1207 intel_pt_calc_cyc_to_tsc(decoder, false);
1208 }
1209
1210 intel_pt_log_to("Setting timestamp", decoder->timestamp);
1211}
1212
1213static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1214{
1215 intel_pt_log("ERROR: Buffer overflow\n");
1216 intel_pt_clear_tx_flags(decoder);
1217 decoder->have_tma = false;
1218 decoder->cbr = 0;
1219 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1220 decoder->overflow = true;
1221 return -EOVERFLOW;
1222}
1223
1224static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
1225{
1226 uint32_t ctc = decoder->packet.payload;
1227 uint32_t fc = decoder->packet.count;
1228 uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
1229
1230 if (!decoder->tsc_ctc_ratio_d)
1231 return;
1232
1233 decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
1234 decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
1235 if (decoder->tsc_ctc_mult) {
1236 decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
1237 } else {
1238 decoder->ctc_timestamp -= multdiv(ctc_rem,
1239 decoder->tsc_ctc_ratio_n,
1240 decoder->tsc_ctc_ratio_d);
1241 }
1242 decoder->ctc_delta = 0;
1243 decoder->have_tma = true;
1244 intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
1245 decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
1246}
1247
1248static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
1249{
1250 uint64_t timestamp;
1251 uint32_t mtc, mtc_delta;
1252
1253 if (!decoder->have_tma)
1254 return;
1255
1256 mtc = decoder->packet.payload;
1257
1258 if (mtc > decoder->last_mtc)
1259 mtc_delta = mtc - decoder->last_mtc;
1260 else
1261 mtc_delta = mtc + 256 - decoder->last_mtc;
1262
1263 decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
1264
1265 if (decoder->tsc_ctc_mult) {
1266 timestamp = decoder->ctc_timestamp +
1267 decoder->ctc_delta * decoder->tsc_ctc_mult;
1268 } else {
1269 timestamp = decoder->ctc_timestamp +
1270 multdiv(decoder->ctc_delta,
1271 decoder->tsc_ctc_ratio_n,
1272 decoder->tsc_ctc_ratio_d);
1273 }
1274
1275 if (timestamp < decoder->timestamp)
1276 intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1277 timestamp, decoder->timestamp);
1278 else
1279 decoder->timestamp = timestamp;
1280
1281 decoder->timestamp_insn_cnt = 0;
1282 decoder->last_mtc = mtc;
1283
1284 if (decoder->last_packet_type == INTEL_PT_CYC) {
1285 decoder->cyc_ref_timestamp = decoder->timestamp;
1286 decoder->cycle_cnt = 0;
1287 decoder->have_calc_cyc_to_tsc = false;
1288 intel_pt_calc_cyc_to_tsc(decoder, true);
1289 }
1290}
1291
1292static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
1293{
1294 unsigned int cbr = decoder->packet.payload;
1295
1296 if (decoder->cbr == cbr)
1297 return;
1298
1299 decoder->cbr = cbr;
1300 decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
1301}
1302
1303static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1304{
1305 uint64_t timestamp = decoder->cyc_ref_timestamp;
1306
1307 decoder->have_cyc = true;
1308
1309 decoder->cycle_cnt += decoder->packet.payload;
1310
1311 if (!decoder->cyc_ref_timestamp)
1312 return;
1313
1314 if (decoder->have_calc_cyc_to_tsc)
1315 timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
1316 else if (decoder->cbr)
1317 timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
1318 else
1319 return;
1320
1321 if (timestamp < decoder->timestamp)
1322 intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1323 timestamp, decoder->timestamp);
1324 else
1325 decoder->timestamp = timestamp;
1326}
1327
1328/* Walk PSB+ packets when already in sync. */
1329static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1330{
1331 int err;
1332
1333 while (1) {
1334 err = intel_pt_get_next_packet(decoder);
1335 if (err)
1336 return err;
1337
1338 switch (decoder->packet.type) {
1339 case INTEL_PT_PSBEND:
1340 return 0;
1341
1342 case INTEL_PT_TIP_PGD:
1343 case INTEL_PT_TIP_PGE:
1344 case INTEL_PT_TIP:
1345 case INTEL_PT_TNT:
1346 case INTEL_PT_TRACESTOP:
1347 case INTEL_PT_BAD:
1348 case INTEL_PT_PSB:
1349 decoder->have_tma = false;
1350 intel_pt_log("ERROR: Unexpected packet\n");
1351 return -EAGAIN;
1352
1353 case INTEL_PT_OVF:
1354 return intel_pt_overflow(decoder);
1355
1356 case INTEL_PT_TSC:
1357 intel_pt_calc_tsc_timestamp(decoder);
1358 break;
1359
1360 case INTEL_PT_TMA:
1361 intel_pt_calc_tma(decoder);
1362 break;
1363
1364 case INTEL_PT_CBR:
1365 intel_pt_calc_cbr(decoder);
1366 break;
1367
1368 case INTEL_PT_MODE_EXEC:
1369 decoder->exec_mode = decoder->packet.payload;
1370 break;
1371
1372 case INTEL_PT_PIP:
1373 decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1374 break;
1375
1376 case INTEL_PT_FUP:
1377 decoder->pge = true;
1378 intel_pt_set_last_ip(decoder);
1379 break;
1380
1381 case INTEL_PT_MODE_TSX:
1382 intel_pt_update_in_tx(decoder);
1383 break;
1384
1385 case INTEL_PT_MTC:
1386 intel_pt_calc_mtc_timestamp(decoder);
1387 if (decoder->period_type == INTEL_PT_PERIOD_MTC)
1388 decoder->state.type |= INTEL_PT_INSTRUCTION;
1389 break;
1390
1391 case INTEL_PT_CYC:
1392 case INTEL_PT_VMCS:
1393 case INTEL_PT_MNT:
1394 case INTEL_PT_PAD:
1395 default:
1396 break;
1397 }
1398 }
1399}
1400
1401static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1402{
1403 int err;
1404
1405 if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
1406 decoder->tx_flags = 0;
1407 decoder->state.flags &= ~INTEL_PT_IN_TX;
1408 decoder->state.flags |= INTEL_PT_ABORT_TX;
1409 } else {
1410 decoder->state.flags |= INTEL_PT_ASYNC;
1411 }
1412
1413 while (1) {
1414 err = intel_pt_get_next_packet(decoder);
1415 if (err)
1416 return err;
1417
1418 switch (decoder->packet.type) {
1419 case INTEL_PT_TNT:
1420 case INTEL_PT_FUP:
1421 case INTEL_PT_TRACESTOP:
1422 case INTEL_PT_PSB:
1423 case INTEL_PT_TSC:
1424 case INTEL_PT_TMA:
1425 case INTEL_PT_CBR:
1426 case INTEL_PT_MODE_TSX:
1427 case INTEL_PT_BAD:
1428 case INTEL_PT_PSBEND:
1429 intel_pt_log("ERROR: Missing TIP after FUP\n");
1430 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1431 return -ENOENT;
1432
1433 case INTEL_PT_OVF:
1434 return intel_pt_overflow(decoder);
1435
1436 case INTEL_PT_TIP_PGD:
1437 decoder->state.from_ip = decoder->ip;
1438 decoder->state.to_ip = 0;
1439 if (decoder->packet.count != 0) {
1440 intel_pt_set_ip(decoder);
1441 intel_pt_log("Omitting PGD ip " x64_fmt "\n",
1442 decoder->ip);
1443 }
1444 decoder->pge = false;
1445 decoder->continuous_period = false;
1446 return 0;
1447
1448 case INTEL_PT_TIP_PGE:
1449 decoder->pge = true;
1450 intel_pt_log("Omitting PGE ip " x64_fmt "\n",
1451 decoder->ip);
1452 decoder->state.from_ip = 0;
1453 if (decoder->packet.count == 0) {
1454 decoder->state.to_ip = 0;
1455 } else {
1456 intel_pt_set_ip(decoder);
1457 decoder->state.to_ip = decoder->ip;
1458 }
1459 return 0;
1460
1461 case INTEL_PT_TIP:
1462 decoder->state.from_ip = decoder->ip;
1463 if (decoder->packet.count == 0) {
1464 decoder->state.to_ip = 0;
1465 } else {
1466 intel_pt_set_ip(decoder);
1467 decoder->state.to_ip = decoder->ip;
1468 }
1469 return 0;
1470
1471 case INTEL_PT_PIP:
1472 decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1473 break;
1474
1475 case INTEL_PT_MTC:
1476 intel_pt_calc_mtc_timestamp(decoder);
1477 if (decoder->period_type == INTEL_PT_PERIOD_MTC)
1478 decoder->state.type |= INTEL_PT_INSTRUCTION;
1479 break;
1480
1481 case INTEL_PT_CYC:
1482 intel_pt_calc_cyc_timestamp(decoder);
1483 break;
1484
1485 case INTEL_PT_MODE_EXEC:
1486 decoder->exec_mode = decoder->packet.payload;
1487 break;
1488
1489 case INTEL_PT_VMCS:
1490 case INTEL_PT_MNT:
1491 case INTEL_PT_PAD:
1492 break;
1493
1494 default:
1495 return intel_pt_bug(decoder);
1496 }
1497 }
1498}
1499
1500static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
1501{
1502 bool no_tip = false;
1503 int err;
1504
1505 while (1) {
1506 err = intel_pt_get_next_packet(decoder);
1507 if (err)
1508 return err;
1509next:
1510 switch (decoder->packet.type) {
1511 case INTEL_PT_TNT:
1512 if (!decoder->packet.count)
1513 break;
1514 decoder->tnt = decoder->packet;
1515 decoder->pkt_state = INTEL_PT_STATE_TNT;
1516 err = intel_pt_walk_tnt(decoder);
1517 if (err == -EAGAIN)
1518 break;
1519 return err;
1520
1521 case INTEL_PT_TIP_PGD:
1522 if (decoder->packet.count != 0)
1523 intel_pt_set_last_ip(decoder);
1524 decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
1525 return intel_pt_walk_tip(decoder);
1526
1527 case INTEL_PT_TIP_PGE: {
1528 decoder->pge = true;
1529 if (decoder->packet.count == 0) {
1530 intel_pt_log_at("Skipping zero TIP.PGE",
1531 decoder->pos);
1532 break;
1533 }
1534 intel_pt_set_ip(decoder);
1535 decoder->state.from_ip = 0;
1536 decoder->state.to_ip = decoder->ip;
1537 return 0;
1538 }
1539
1540 case INTEL_PT_OVF:
1541 return intel_pt_overflow(decoder);
1542
1543 case INTEL_PT_TIP:
1544 if (decoder->packet.count != 0)
1545 intel_pt_set_last_ip(decoder);
1546 decoder->pkt_state = INTEL_PT_STATE_TIP;
1547 return intel_pt_walk_tip(decoder);
1548
1549 case INTEL_PT_FUP:
1550 if (decoder->packet.count == 0) {
1551 intel_pt_log_at("Skipping zero FUP",
1552 decoder->pos);
1553 no_tip = false;
1554 break;
1555 }
1556 intel_pt_set_last_ip(decoder);
1557 err = intel_pt_walk_fup(decoder);
1558 if (err != -EAGAIN) {
1559 if (err)
1560 return err;
1561 if (no_tip)
1562 decoder->pkt_state =
1563 INTEL_PT_STATE_FUP_NO_TIP;
1564 else
1565 decoder->pkt_state = INTEL_PT_STATE_FUP;
1566 return 0;
1567 }
1568 if (no_tip) {
1569 no_tip = false;
1570 break;
1571 }
1572 return intel_pt_walk_fup_tip(decoder);
1573
1574 case INTEL_PT_TRACESTOP:
1575 decoder->pge = false;
1576 decoder->continuous_period = false;
1577 intel_pt_clear_tx_flags(decoder);
1578 decoder->have_tma = false;
1579 break;
1580
1581 case INTEL_PT_PSB:
1582 intel_pt_clear_stack(&decoder->stack);
1583 err = intel_pt_walk_psbend(decoder);
1584 if (err == -EAGAIN)
1585 goto next;
1586 if (err)
1587 return err;
1588 break;
1589
1590 case INTEL_PT_PIP:
1591 decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1592 break;
1593
1594 case INTEL_PT_MTC:
1595 intel_pt_calc_mtc_timestamp(decoder);
1596 if (decoder->period_type != INTEL_PT_PERIOD_MTC)
1597 break;
1598 /*
1599 * Ensure that there has been an instruction since the
1600 * last MTC.
1601 */
1602 if (!decoder->mtc_insn)
1603 break;
1604 decoder->mtc_insn = false;
1605 /* Ensure that there is a timestamp */
1606 if (!decoder->timestamp)
1607 break;
1608 decoder->state.type = INTEL_PT_INSTRUCTION;
1609 decoder->state.from_ip = decoder->ip;
1610 decoder->state.to_ip = 0;
1611 decoder->mtc_insn = false;
1612 return 0;
1613
1614 case INTEL_PT_TSC:
1615 intel_pt_calc_tsc_timestamp(decoder);
1616 break;
1617
1618 case INTEL_PT_TMA:
1619 intel_pt_calc_tma(decoder);
1620 break;
1621
1622 case INTEL_PT_CYC:
1623 intel_pt_calc_cyc_timestamp(decoder);
1624 break;
1625
1626 case INTEL_PT_CBR:
1627 intel_pt_calc_cbr(decoder);
1628 break;
1629
1630 case INTEL_PT_MODE_EXEC:
1631 decoder->exec_mode = decoder->packet.payload;
1632 break;
1633
1634 case INTEL_PT_MODE_TSX:
1635 /* MODE_TSX need not be followed by FUP */
1636 if (!decoder->pge) {
1637 intel_pt_update_in_tx(decoder);
1638 break;
1639 }
1640 err = intel_pt_mode_tsx(decoder, &no_tip);
1641 if (err)
1642 return err;
1643 goto next;
1644
1645 case INTEL_PT_BAD: /* Does not happen */
1646 return intel_pt_bug(decoder);
1647
1648 case INTEL_PT_PSBEND:
1649 case INTEL_PT_VMCS:
1650 case INTEL_PT_MNT:
1651 case INTEL_PT_PAD:
1652 break;
1653
1654 default:
1655 return intel_pt_bug(decoder);
1656 }
1657 }
1658}
1659
1660/* Walk PSB+ packets to get in sync. */
1661static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
1662{
1663 int err;
1664
1665 while (1) {
1666 err = intel_pt_get_next_packet(decoder);
1667 if (err)
1668 return err;
1669
1670 switch (decoder->packet.type) {
1671 case INTEL_PT_TIP_PGD:
1672 decoder->continuous_period = false;
1673 case INTEL_PT_TIP_PGE:
1674 case INTEL_PT_TIP:
1675 intel_pt_log("ERROR: Unexpected packet\n");
1676 return -ENOENT;
1677
1678 case INTEL_PT_FUP:
1679 decoder->pge = true;
1680 if (decoder->last_ip || decoder->packet.count == 6 ||
1681 decoder->packet.count == 0) {
1682 uint64_t current_ip = decoder->ip;
1683
1684 intel_pt_set_ip(decoder);
1685 if (current_ip)
1686 intel_pt_log_to("Setting IP",
1687 decoder->ip);
1688 }
1689 break;
1690
1691 case INTEL_PT_MTC:
1692 intel_pt_calc_mtc_timestamp(decoder);
1693 break;
1694
1695 case INTEL_PT_TSC:
1696 intel_pt_calc_tsc_timestamp(decoder);
1697 break;
1698
1699 case INTEL_PT_TMA:
1700 intel_pt_calc_tma(decoder);
1701 break;
1702
1703 case INTEL_PT_CYC:
1704 intel_pt_calc_cyc_timestamp(decoder);
1705 break;
1706
1707 case INTEL_PT_CBR:
1708 intel_pt_calc_cbr(decoder);
1709 break;
1710
1711 case INTEL_PT_PIP:
1712 decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1713 break;
1714
1715 case INTEL_PT_MODE_EXEC:
1716 decoder->exec_mode = decoder->packet.payload;
1717 break;
1718
1719 case INTEL_PT_MODE_TSX:
1720 intel_pt_update_in_tx(decoder);
1721 break;
1722
1723 case INTEL_PT_TRACESTOP:
1724 decoder->pge = false;
1725 decoder->continuous_period = false;
1726 intel_pt_clear_tx_flags(decoder);
1727 case INTEL_PT_TNT:
1728 decoder->have_tma = false;
1729 intel_pt_log("ERROR: Unexpected packet\n");
1730 if (decoder->ip)
1731 decoder->pkt_state = INTEL_PT_STATE_ERR4;
1732 else
1733 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1734 return -ENOENT;
1735
1736 case INTEL_PT_BAD: /* Does not happen */
1737 return intel_pt_bug(decoder);
1738
1739 case INTEL_PT_OVF:
1740 return intel_pt_overflow(decoder);
1741
1742 case INTEL_PT_PSBEND:
1743 return 0;
1744
1745 case INTEL_PT_PSB:
1746 case INTEL_PT_VMCS:
1747 case INTEL_PT_MNT:
1748 case INTEL_PT_PAD:
1749 default:
1750 break;
1751 }
1752 }
1753}
1754
1755static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
1756{
1757 int err;
1758
1759 while (1) {
1760 err = intel_pt_get_next_packet(decoder);
1761 if (err)
1762 return err;
1763
1764 switch (decoder->packet.type) {
1765 case INTEL_PT_TIP_PGD:
1766 decoder->continuous_period = false;
1767 case INTEL_PT_TIP_PGE:
1768 case INTEL_PT_TIP:
1769 decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
1770 if (decoder->last_ip || decoder->packet.count == 6 ||
1771 decoder->packet.count == 0)
1772 intel_pt_set_ip(decoder);
1773 if (decoder->ip)
1774 return 0;
1775 break;
1776
1777 case INTEL_PT_FUP:
1778 if (decoder->overflow) {
1779 if (decoder->last_ip ||
1780 decoder->packet.count == 6 ||
1781 decoder->packet.count == 0)
1782 intel_pt_set_ip(decoder);
1783 if (decoder->ip)
1784 return 0;
1785 }
1786 if (decoder->packet.count)
1787 intel_pt_set_last_ip(decoder);
1788 break;
1789
1790 case INTEL_PT_MTC:
1791 intel_pt_calc_mtc_timestamp(decoder);
1792 break;
1793
1794 case INTEL_PT_TSC:
1795 intel_pt_calc_tsc_timestamp(decoder);
1796 break;
1797
1798 case INTEL_PT_TMA:
1799 intel_pt_calc_tma(decoder);
1800 break;
1801
1802 case INTEL_PT_CYC:
1803 intel_pt_calc_cyc_timestamp(decoder);
1804 break;
1805
1806 case INTEL_PT_CBR:
1807 intel_pt_calc_cbr(decoder);
1808 break;
1809
1810 case INTEL_PT_PIP:
1811 decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1812 break;
1813
1814 case INTEL_PT_MODE_EXEC:
1815 decoder->exec_mode = decoder->packet.payload;
1816 break;
1817
1818 case INTEL_PT_MODE_TSX:
1819 intel_pt_update_in_tx(decoder);
1820 break;
1821
1822 case INTEL_PT_OVF:
1823 return intel_pt_overflow(decoder);
1824
1825 case INTEL_PT_BAD: /* Does not happen */
1826 return intel_pt_bug(decoder);
1827
1828 case INTEL_PT_TRACESTOP:
1829 decoder->pge = false;
1830 decoder->continuous_period = false;
1831 intel_pt_clear_tx_flags(decoder);
1832 decoder->have_tma = false;
1833 break;
1834
1835 case INTEL_PT_PSB:
1836 err = intel_pt_walk_psb(decoder);
1837 if (err)
1838 return err;
1839 if (decoder->ip) {
1840 /* Do not have a sample */
1841 decoder->state.type = 0;
1842 return 0;
1843 }
1844 break;
1845
1846 case INTEL_PT_TNT:
1847 case INTEL_PT_PSBEND:
1848 case INTEL_PT_VMCS:
1849 case INTEL_PT_MNT:
1850 case INTEL_PT_PAD:
1851 default:
1852 break;
1853 }
1854 }
1855}
1856
1857static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
1858{
1859 int err;
1860
1861 intel_pt_log("Scanning for full IP\n");
1862 err = intel_pt_walk_to_ip(decoder);
1863 if (err)
1864 return err;
1865
1866 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1867 decoder->overflow = false;
1868
1869 decoder->state.from_ip = 0;
1870 decoder->state.to_ip = decoder->ip;
1871 intel_pt_log_to("Setting IP", decoder->ip);
1872
1873 return 0;
1874}
1875
1876static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
1877{
1878 const unsigned char *end = decoder->buf + decoder->len;
1879 size_t i;
1880
1881 for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
1882 if (i > decoder->len)
1883 continue;
1884 if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
1885 return i;
1886 }
1887 return 0;
1888}
1889
1890static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
1891{
1892 size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
1893 const char *psb = INTEL_PT_PSB_STR;
1894
1895 if (rest_psb > decoder->len ||
1896 memcmp(decoder->buf, psb + part_psb, rest_psb))
1897 return 0;
1898
1899 return rest_psb;
1900}
1901
1902static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
1903 int part_psb)
1904{
1905 int rest_psb, ret;
1906
1907 decoder->pos += decoder->len;
1908 decoder->len = 0;
1909
1910 ret = intel_pt_get_next_data(decoder);
1911 if (ret)
1912 return ret;
1913
1914 rest_psb = intel_pt_rest_psb(decoder, part_psb);
1915 if (!rest_psb)
1916 return 0;
1917
1918 decoder->pos -= part_psb;
1919 decoder->next_buf = decoder->buf + rest_psb;
1920 decoder->next_len = decoder->len - rest_psb;
1921 memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
1922 decoder->buf = decoder->temp_buf;
1923 decoder->len = INTEL_PT_PSB_LEN;
1924
1925 return 0;
1926}
1927
1928static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
1929{
1930 unsigned char *next;
1931 int ret;
1932
1933 intel_pt_log("Scanning for PSB\n");
1934 while (1) {
1935 if (!decoder->len) {
1936 ret = intel_pt_get_next_data(decoder);
1937 if (ret)
1938 return ret;
1939 }
1940
1941 next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
1942 INTEL_PT_PSB_LEN);
1943 if (!next) {
1944 int part_psb;
1945
1946 part_psb = intel_pt_part_psb(decoder);
1947 if (part_psb) {
1948 ret = intel_pt_get_split_psb(decoder, part_psb);
1949 if (ret)
1950 return ret;
1951 } else {
1952 decoder->pos += decoder->len;
1953 decoder->len = 0;
1954 }
1955 continue;
1956 }
1957
1958 decoder->pkt_step = next - decoder->buf;
1959 return intel_pt_get_next_packet(decoder);
1960 }
1961}
1962
1963static int intel_pt_sync(struct intel_pt_decoder *decoder)
1964{
1965 int err;
1966
1967 decoder->pge = false;
1968 decoder->continuous_period = false;
1969 decoder->last_ip = 0;
1970 decoder->ip = 0;
1971 intel_pt_clear_stack(&decoder->stack);
1972
1973 err = intel_pt_scan_for_psb(decoder);
1974 if (err)
1975 return err;
1976
1977 decoder->pkt_state = INTEL_PT_STATE_NO_IP;
1978
1979 err = intel_pt_walk_psb(decoder);
1980 if (err)
1981 return err;
1982
1983 if (decoder->ip) {
1984 decoder->state.type = 0; /* Do not have a sample */
1985 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1986 } else {
1987 return intel_pt_sync_ip(decoder);
1988 }
1989
1990 return 0;
1991}
1992
1993static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
1994{
1995 uint64_t est = decoder->timestamp_insn_cnt << 1;
1996
1997 if (!decoder->cbr || !decoder->max_non_turbo_ratio)
1998 goto out;
1999
2000 est *= decoder->max_non_turbo_ratio;
2001 est /= decoder->cbr;
2002out:
2003 return decoder->timestamp + est;
2004}
2005
2006const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2007{
2008 int err;
2009
2010 do {
2011 decoder->state.type = INTEL_PT_BRANCH;
2012 decoder->state.flags = 0;
2013
2014 switch (decoder->pkt_state) {
2015 case INTEL_PT_STATE_NO_PSB:
2016 err = intel_pt_sync(decoder);
2017 break;
2018 case INTEL_PT_STATE_NO_IP:
2019 decoder->last_ip = 0;
2020 /* Fall through */
2021 case INTEL_PT_STATE_ERR_RESYNC:
2022 err = intel_pt_sync_ip(decoder);
2023 break;
2024 case INTEL_PT_STATE_IN_SYNC:
2025 err = intel_pt_walk_trace(decoder);
2026 break;
2027 case INTEL_PT_STATE_TNT:
2028 err = intel_pt_walk_tnt(decoder);
2029 if (err == -EAGAIN)
2030 err = intel_pt_walk_trace(decoder);
2031 break;
2032 case INTEL_PT_STATE_TIP:
2033 case INTEL_PT_STATE_TIP_PGD:
2034 err = intel_pt_walk_tip(decoder);
2035 break;
2036 case INTEL_PT_STATE_FUP:
2037 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2038 err = intel_pt_walk_fup(decoder);
2039 if (err == -EAGAIN)
2040 err = intel_pt_walk_fup_tip(decoder);
2041 else if (!err)
2042 decoder->pkt_state = INTEL_PT_STATE_FUP;
2043 break;
2044 case INTEL_PT_STATE_FUP_NO_TIP:
2045 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2046 err = intel_pt_walk_fup(decoder);
2047 if (err == -EAGAIN)
2048 err = intel_pt_walk_trace(decoder);
2049 break;
2050 default:
2051 err = intel_pt_bug(decoder);
2052 break;
2053 }
2054 } while (err == -ENOLINK);
2055
2056 decoder->state.err = err ? intel_pt_ext_err(err) : 0;
2057 decoder->state.timestamp = decoder->timestamp;
2058 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
2059 decoder->state.cr3 = decoder->cr3;
2060 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
2061
2062 if (err)
2063 decoder->state.from_ip = decoder->ip;
2064
2065 return &decoder->state;
2066}
2067
2068static bool intel_pt_at_psb(unsigned char *buf, size_t len)
2069{
2070 if (len < INTEL_PT_PSB_LEN)
2071 return false;
2072 return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
2073 INTEL_PT_PSB_LEN);
2074}
2075
2076/**
2077 * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
2078 * @buf: pointer to buffer pointer
2079 * @len: size of buffer
2080 *
2081 * Updates the buffer pointer to point to the start of the next PSB packet if
2082 * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
2083 * @len is adjusted accordingly.
2084 *
2085 * Return: %true if a PSB packet is found, %false otherwise.
2086 */
2087static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
2088{
2089 unsigned char *next;
2090
2091 next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
2092 if (next) {
2093 *len -= next - *buf;
2094 *buf = next;
2095 return true;
2096 }
2097 return false;
2098}
2099
2100/**
2101 * intel_pt_step_psb - move buffer pointer to the start of the following PSB
2102 * packet.
2103 * @buf: pointer to buffer pointer
2104 * @len: size of buffer
2105 *
2106 * Updates the buffer pointer to point to the start of the following PSB packet
2107 * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
2108 * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
2109 *
2110 * Return: %true if a PSB packet is found, %false otherwise.
2111 */
2112static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
2113{
2114 unsigned char *next;
2115
2116 if (!*len)
2117 return false;
2118
2119 next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
2120 if (next) {
2121 *len -= next - *buf;
2122 *buf = next;
2123 return true;
2124 }
2125 return false;
2126}
2127
2128/**
2129 * intel_pt_last_psb - find the last PSB packet in a buffer.
2130 * @buf: buffer
2131 * @len: size of buffer
2132 *
2133 * This function finds the last PSB in a buffer.
2134 *
2135 * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
2136 */
2137static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
2138{
2139 const char *n = INTEL_PT_PSB_STR;
2140 unsigned char *p;
2141 size_t k;
2142
2143 if (len < INTEL_PT_PSB_LEN)
2144 return NULL;
2145
2146 k = len - INTEL_PT_PSB_LEN + 1;
2147 while (1) {
2148 p = memrchr(buf, n[0], k);
2149 if (!p)
2150 return NULL;
2151 if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
2152 return p;
2153 k = p - buf;
2154 if (!k)
2155 return NULL;
2156 }
2157}
2158
2159/**
2160 * intel_pt_next_tsc - find and return next TSC.
2161 * @buf: buffer
2162 * @len: size of buffer
2163 * @tsc: TSC value returned
2164 *
2165 * Find a TSC packet in @buf and return the TSC value. This function assumes
2166 * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
2167 * PSBEND packet is found.
2168 *
2169 * Return: %true if TSC is found, false otherwise.
2170 */
2171static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
2172{
2173 struct intel_pt_pkt packet;
2174 int ret;
2175
2176 while (len) {
2177 ret = intel_pt_get_packet(buf, len, &packet);
2178 if (ret <= 0)
2179 return false;
2180 if (packet.type == INTEL_PT_TSC) {
2181 *tsc = packet.payload;
2182 return true;
2183 }
2184 if (packet.type == INTEL_PT_PSBEND)
2185 return false;
2186 buf += ret;
2187 len -= ret;
2188 }
2189 return false;
2190}
2191
2192/**
2193 * intel_pt_tsc_cmp - compare 7-byte TSCs.
2194 * @tsc1: first TSC to compare
2195 * @tsc2: second TSC to compare
2196 *
2197 * This function compares 7-byte TSC values allowing for the possibility that
2198 * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
2199 * around so for that purpose this function assumes the absolute difference is
2200 * less than half the maximum difference.
2201 *
2202 * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
2203 * after @tsc2.
2204 */
2205static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
2206{
2207 const uint64_t halfway = (1ULL << 55);
2208
2209 if (tsc1 == tsc2)
2210 return 0;
2211
2212 if (tsc1 < tsc2) {
2213 if (tsc2 - tsc1 < halfway)
2214 return -1;
2215 else
2216 return 1;
2217 } else {
2218 if (tsc1 - tsc2 < halfway)
2219 return 1;
2220 else
2221 return -1;
2222 }
2223}
2224
2225/**
2226 * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
2227 * using TSC.
2228 * @buf_a: first buffer
2229 * @len_a: size of first buffer
2230 * @buf_b: second buffer
2231 * @len_b: size of second buffer
2232 *
2233 * If the trace contains TSC we can look at the last TSC of @buf_a and the
2234 * first TSC of @buf_b in order to determine if the buffers overlap, and then
2235 * walk forward in @buf_b until a later TSC is found. A precondition is that
2236 * @buf_a and @buf_b are positioned at a PSB.
2237 *
2238 * Return: A pointer into @buf_b from where non-overlapped data starts, or
2239 * @buf_b + @len_b if there is no non-overlapped data.
2240 */
2241static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
2242 size_t len_a,
2243 unsigned char *buf_b,
2244 size_t len_b)
2245{
2246 uint64_t tsc_a, tsc_b;
2247 unsigned char *p;
2248 size_t len;
2249
2250 p = intel_pt_last_psb(buf_a, len_a);
2251 if (!p)
2252 return buf_b; /* No PSB in buf_a => no overlap */
2253
2254 len = len_a - (p - buf_a);
2255 if (!intel_pt_next_tsc(p, len, &tsc_a)) {
2256 /* The last PSB+ in buf_a is incomplete, so go back one more */
2257 len_a -= len;
2258 p = intel_pt_last_psb(buf_a, len_a);
2259 if (!p)
2260 return buf_b; /* No full PSB+ => assume no overlap */
2261 len = len_a - (p - buf_a);
2262 if (!intel_pt_next_tsc(p, len, &tsc_a))
2263 return buf_b; /* No TSC in buf_a => assume no overlap */
2264 }
2265
2266 while (1) {
2267 /* Ignore PSB+ with no TSC */
2268 if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
2269 intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
2270 return buf_b; /* tsc_a < tsc_b => no overlap */
2271
2272 if (!intel_pt_step_psb(&buf_b, &len_b))
2273 return buf_b + len_b; /* No PSB in buf_b => no data */
2274 }
2275}
2276
2277/**
2278 * intel_pt_find_overlap - determine start of non-overlapped trace data.
2279 * @buf_a: first buffer
2280 * @len_a: size of first buffer
2281 * @buf_b: second buffer
2282 * @len_b: size of second buffer
2283 * @have_tsc: can use TSC packets to detect overlap
2284 *
2285 * When trace samples or snapshots are recorded there is the possibility that
2286 * the data overlaps. Note that, for the purposes of decoding, data is only
2287 * useful if it begins with a PSB packet.
2288 *
2289 * Return: A pointer into @buf_b from where non-overlapped data starts, or
2290 * @buf_b + @len_b if there is no non-overlapped data.
2291 */
2292unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2293 unsigned char *buf_b, size_t len_b,
2294 bool have_tsc)
2295{
2296 unsigned char *found;
2297
2298 /* Buffer 'b' must start at PSB so throw away everything before that */
2299 if (!intel_pt_next_psb(&buf_b, &len_b))
2300 return buf_b + len_b; /* No PSB */
2301
2302 if (!intel_pt_next_psb(&buf_a, &len_a))
2303 return buf_b; /* No overlap */
2304
2305 if (have_tsc) {
2306 found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
2307 if (found)
2308 return found;
2309 }
2310
2311 /*
2312 * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
2313 * we can ignore the first part of buffer 'a'.
2314 */
2315 while (len_b < len_a) {
2316 if (!intel_pt_step_psb(&buf_a, &len_a))
2317 return buf_b; /* No overlap */
2318 }
2319
2320 /* Now len_b >= len_a */
2321 if (len_b > len_a) {
2322 /* The leftover buffer 'b' must start at a PSB */
2323 while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
2324 if (!intel_pt_step_psb(&buf_a, &len_a))
2325 return buf_b; /* No overlap */
2326 }
2327 }
2328
2329 while (1) {
2330 /* Potential overlap so check the bytes */
2331 found = memmem(buf_a, len_a, buf_b, len_a);
2332 if (found)
2333 return buf_b + len_a;
2334
2335 /* Try again at next PSB in buffer 'a' */
2336 if (!intel_pt_step_psb(&buf_a, &len_a))
2337 return buf_b; /* No overlap */
2338
2339 /* The leftover buffer 'b' must start at a PSB */
2340 while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
2341 if (!intel_pt_step_psb(&buf_a, &len_a))
2342 return buf_b; /* No overlap */
2343 }
2344 }
2345}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 000000000000..02c38fec1c37
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,109 @@
1/*
2 * intel_pt_decoder.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__INTEL_PT_DECODER_H__
17#define INCLUDE__INTEL_PT_DECODER_H__
18
19#include <stdint.h>
20#include <stddef.h>
21#include <stdbool.h>
22
23#include "intel-pt-insn-decoder.h"
24
25#define INTEL_PT_IN_TX (1 << 0)
26#define INTEL_PT_ABORT_TX (1 << 1)
27#define INTEL_PT_ASYNC (1 << 2)
28
29enum intel_pt_sample_type {
30 INTEL_PT_BRANCH = 1 << 0,
31 INTEL_PT_INSTRUCTION = 1 << 1,
32 INTEL_PT_TRANSACTION = 1 << 2,
33};
34
35enum intel_pt_period_type {
36 INTEL_PT_PERIOD_NONE,
37 INTEL_PT_PERIOD_INSTRUCTIONS,
38 INTEL_PT_PERIOD_TICKS,
39 INTEL_PT_PERIOD_MTC,
40};
41
42enum {
43 INTEL_PT_ERR_NOMEM = 1,
44 INTEL_PT_ERR_INTERN,
45 INTEL_PT_ERR_BADPKT,
46 INTEL_PT_ERR_NODATA,
47 INTEL_PT_ERR_NOINSN,
48 INTEL_PT_ERR_MISMAT,
49 INTEL_PT_ERR_OVR,
50 INTEL_PT_ERR_LOST,
51 INTEL_PT_ERR_UNK,
52 INTEL_PT_ERR_NELOOP,
53 INTEL_PT_ERR_MAX,
54};
55
56struct intel_pt_state {
57 enum intel_pt_sample_type type;
58 int err;
59 uint64_t from_ip;
60 uint64_t to_ip;
61 uint64_t cr3;
62 uint64_t tot_insn_cnt;
63 uint64_t timestamp;
64 uint64_t est_timestamp;
65 uint64_t trace_nr;
66 uint32_t flags;
67 enum intel_pt_insn_op insn_op;
68 int insn_len;
69};
70
71struct intel_pt_insn;
72
73struct intel_pt_buffer {
74 const unsigned char *buf;
75 size_t len;
76 bool consecutive;
77 uint64_t ref_timestamp;
78 uint64_t trace_nr;
79};
80
81struct intel_pt_params {
82 int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
83 int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
84 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
85 uint64_t max_insn_cnt, void *data);
86 void *data;
87 bool return_compression;
88 uint64_t period;
89 enum intel_pt_period_type period_type;
90 unsigned max_non_turbo_ratio;
91 unsigned int mtc_period;
92 uint32_t tsc_ctc_ratio_n;
93 uint32_t tsc_ctc_ratio_d;
94};
95
96struct intel_pt_decoder;
97
98struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
99void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
100
101const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
102
103unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
104 unsigned char *buf_b, size_t len_b,
105 bool have_tsc);
106
107int intel_pt__strerror(int code, char *buf, size_t buflen);
108
109#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 000000000000..9e4eb8fcd559
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,246 @@
1/*
2 * intel_pt_insn_decoder.c: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdio.h>
17#include <string.h>
18#include <endian.h>
19#include <byteswap.h>
20
21#include "event.h"
22
23#include "insn.h"
24
25#include "inat.c"
26#include "insn.c"
27
28#include "intel-pt-insn-decoder.h"
29
30/* Based on branch_type() from perf_event_intel_lbr.c */
31static void intel_pt_insn_decoder(struct insn *insn,
32 struct intel_pt_insn *intel_pt_insn)
33{
34 enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
35 enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
36 int ext;
37
38 if (insn_is_avx(insn)) {
39 intel_pt_insn->op = INTEL_PT_OP_OTHER;
40 intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
41 intel_pt_insn->length = insn->length;
42 return;
43 }
44
45 switch (insn->opcode.bytes[0]) {
46 case 0xf:
47 switch (insn->opcode.bytes[1]) {
48 case 0x05: /* syscall */
49 case 0x34: /* sysenter */
50 op = INTEL_PT_OP_SYSCALL;
51 branch = INTEL_PT_BR_INDIRECT;
52 break;
53 case 0x07: /* sysret */
54 case 0x35: /* sysexit */
55 op = INTEL_PT_OP_SYSRET;
56 branch = INTEL_PT_BR_INDIRECT;
57 break;
58 case 0x80 ... 0x8f: /* jcc */
59 op = INTEL_PT_OP_JCC;
60 branch = INTEL_PT_BR_CONDITIONAL;
61 break;
62 default:
63 break;
64 }
65 break;
66 case 0x70 ... 0x7f: /* jcc */
67 op = INTEL_PT_OP_JCC;
68 branch = INTEL_PT_BR_CONDITIONAL;
69 break;
70 case 0xc2: /* near ret */
71 case 0xc3: /* near ret */
72 case 0xca: /* far ret */
73 case 0xcb: /* far ret */
74 op = INTEL_PT_OP_RET;
75 branch = INTEL_PT_BR_INDIRECT;
76 break;
77 case 0xcf: /* iret */
78 op = INTEL_PT_OP_IRET;
79 branch = INTEL_PT_BR_INDIRECT;
80 break;
81 case 0xcc ... 0xce: /* int */
82 op = INTEL_PT_OP_INT;
83 branch = INTEL_PT_BR_INDIRECT;
84 break;
85 case 0xe8: /* call near rel */
86 op = INTEL_PT_OP_CALL;
87 branch = INTEL_PT_BR_UNCONDITIONAL;
88 break;
89 case 0x9a: /* call far absolute */
90 op = INTEL_PT_OP_CALL;
91 branch = INTEL_PT_BR_INDIRECT;
92 break;
93 case 0xe0 ... 0xe2: /* loop */
94 op = INTEL_PT_OP_LOOP;
95 branch = INTEL_PT_BR_CONDITIONAL;
96 break;
97 case 0xe3: /* jcc */
98 op = INTEL_PT_OP_JCC;
99 branch = INTEL_PT_BR_CONDITIONAL;
100 break;
101 case 0xe9: /* jmp */
102 case 0xeb: /* jmp */
103 op = INTEL_PT_OP_JMP;
104 branch = INTEL_PT_BR_UNCONDITIONAL;
105 break;
106 case 0xea: /* far jmp */
107 op = INTEL_PT_OP_JMP;
108 branch = INTEL_PT_BR_INDIRECT;
109 break;
110 case 0xff: /* call near absolute, call far absolute ind */
111 ext = (insn->modrm.bytes[0] >> 3) & 0x7;
112 switch (ext) {
113 case 2: /* near ind call */
114 case 3: /* far ind call */
115 op = INTEL_PT_OP_CALL;
116 branch = INTEL_PT_BR_INDIRECT;
117 break;
118 case 4:
119 case 5:
120 op = INTEL_PT_OP_JMP;
121 branch = INTEL_PT_BR_INDIRECT;
122 break;
123 default:
124 break;
125 }
126 break;
127 default:
128 break;
129 }
130
131 intel_pt_insn->op = op;
132 intel_pt_insn->branch = branch;
133 intel_pt_insn->length = insn->length;
134
135 if (branch == INTEL_PT_BR_CONDITIONAL ||
136 branch == INTEL_PT_BR_UNCONDITIONAL) {
137#if __BYTE_ORDER == __BIG_ENDIAN
138 switch (insn->immediate.nbytes) {
139 case 1:
140 intel_pt_insn->rel = insn->immediate.value;
141 break;
142 case 2:
143 intel_pt_insn->rel =
144 bswap_16((short)insn->immediate.value);
145 break;
146 case 4:
147 intel_pt_insn->rel = bswap_32(insn->immediate.value);
148 break;
149 }
150#else
151 intel_pt_insn->rel = insn->immediate.value;
152#endif
153 }
154}
155
156int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
157 struct intel_pt_insn *intel_pt_insn)
158{
159 struct insn insn;
160
161 insn_init(&insn, buf, len, x86_64);
162 insn_get_length(&insn);
163 if (!insn_complete(&insn) || insn.length > len)
164 return -1;
165 intel_pt_insn_decoder(&insn, intel_pt_insn);
166 if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ)
167 memcpy(intel_pt_insn->buf, buf, insn.length);
168 else
169 memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ);
170 return 0;
171}
172
173const char *branch_name[] = {
174 [INTEL_PT_OP_OTHER] = "Other",
175 [INTEL_PT_OP_CALL] = "Call",
176 [INTEL_PT_OP_RET] = "Ret",
177 [INTEL_PT_OP_JCC] = "Jcc",
178 [INTEL_PT_OP_JMP] = "Jmp",
179 [INTEL_PT_OP_LOOP] = "Loop",
180 [INTEL_PT_OP_IRET] = "IRet",
181 [INTEL_PT_OP_INT] = "Int",
182 [INTEL_PT_OP_SYSCALL] = "Syscall",
183 [INTEL_PT_OP_SYSRET] = "Sysret",
184};
185
186const char *intel_pt_insn_name(enum intel_pt_insn_op op)
187{
188 return branch_name[op];
189}
190
191int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
192 size_t buf_len)
193{
194 switch (intel_pt_insn->branch) {
195 case INTEL_PT_BR_CONDITIONAL:
196 case INTEL_PT_BR_UNCONDITIONAL:
197 return snprintf(buf, buf_len, "%s %s%d",
198 intel_pt_insn_name(intel_pt_insn->op),
199 intel_pt_insn->rel > 0 ? "+" : "",
200 intel_pt_insn->rel);
201 case INTEL_PT_BR_NO_BRANCH:
202 case INTEL_PT_BR_INDIRECT:
203 return snprintf(buf, buf_len, "%s",
204 intel_pt_insn_name(intel_pt_insn->op));
205 default:
206 break;
207 }
208 return 0;
209}
210
211size_t intel_pt_insn_max_size(void)
212{
213 return MAX_INSN_SIZE;
214}
215
216int intel_pt_insn_type(enum intel_pt_insn_op op)
217{
218 switch (op) {
219 case INTEL_PT_OP_OTHER:
220 return 0;
221 case INTEL_PT_OP_CALL:
222 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
223 case INTEL_PT_OP_RET:
224 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
225 case INTEL_PT_OP_JCC:
226 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
227 case INTEL_PT_OP_JMP:
228 return PERF_IP_FLAG_BRANCH;
229 case INTEL_PT_OP_LOOP:
230 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
231 case INTEL_PT_OP_IRET:
232 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
233 PERF_IP_FLAG_INTERRUPT;
234 case INTEL_PT_OP_INT:
235 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
236 PERF_IP_FLAG_INTERRUPT;
237 case INTEL_PT_OP_SYSCALL:
238 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
239 PERF_IP_FLAG_SYSCALLRET;
240 case INTEL_PT_OP_SYSRET:
241 return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
242 PERF_IP_FLAG_SYSCALLRET;
243 default:
244 return 0;
245 }
246}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 000000000000..b0adbf37323e
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,65 @@
1/*
2 * intel_pt_insn_decoder.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
17#define INCLUDE__INTEL_PT_INSN_DECODER_H__
18
19#include <stddef.h>
20#include <stdint.h>
21
22#define INTEL_PT_INSN_DESC_MAX 32
23#define INTEL_PT_INSN_DBG_BUF_SZ 16
24
25enum intel_pt_insn_op {
26 INTEL_PT_OP_OTHER,
27 INTEL_PT_OP_CALL,
28 INTEL_PT_OP_RET,
29 INTEL_PT_OP_JCC,
30 INTEL_PT_OP_JMP,
31 INTEL_PT_OP_LOOP,
32 INTEL_PT_OP_IRET,
33 INTEL_PT_OP_INT,
34 INTEL_PT_OP_SYSCALL,
35 INTEL_PT_OP_SYSRET,
36};
37
38enum intel_pt_insn_branch {
39 INTEL_PT_BR_NO_BRANCH,
40 INTEL_PT_BR_INDIRECT,
41 INTEL_PT_BR_CONDITIONAL,
42 INTEL_PT_BR_UNCONDITIONAL,
43};
44
45struct intel_pt_insn {
46 enum intel_pt_insn_op op;
47 enum intel_pt_insn_branch branch;
48 int length;
49 int32_t rel;
50 unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ];
51};
52
53int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
54 struct intel_pt_insn *intel_pt_insn);
55
56const char *intel_pt_insn_name(enum intel_pt_insn_op op);
57
58int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
59 size_t buf_len);
60
61size_t intel_pt_insn_max_size(void);
62
63int intel_pt_insn_type(enum intel_pt_insn_op op);
64
65#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 000000000000..d09c7d9f9050
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,155 @@
1/*
2 * intel_pt_log.c: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdio.h>
17#include <stdint.h>
18#include <inttypes.h>
19#include <stdarg.h>
20#include <stdbool.h>
21#include <string.h>
22
23#include "intel-pt-log.h"
24#include "intel-pt-insn-decoder.h"
25
26#include "intel-pt-pkt-decoder.h"
27
28#define MAX_LOG_NAME 256
29
30static FILE *f;
31static char log_name[MAX_LOG_NAME];
32static bool enable_logging;
33
34void intel_pt_log_enable(void)
35{
36 enable_logging = true;
37}
38
39void intel_pt_log_disable(void)
40{
41 if (f)
42 fflush(f);
43 enable_logging = false;
44}
45
46void intel_pt_log_set_name(const char *name)
47{
48 strncpy(log_name, name, MAX_LOG_NAME - 5);
49 strcat(log_name, ".log");
50}
51
52static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
53 int indent)
54{
55 int i;
56
57 for (i = 0; i < indent; i++)
58 fprintf(f, " ");
59
60 fprintf(f, " %08" PRIx64 ": ", pos);
61 for (i = 0; i < len; i++)
62 fprintf(f, " %02x", buf[i]);
63 for (; i < 16; i++)
64 fprintf(f, " ");
65 fprintf(f, " ");
66}
67
68static void intel_pt_print_no_data(uint64_t pos, int indent)
69{
70 int i;
71
72 for (i = 0; i < indent; i++)
73 fprintf(f, " ");
74
75 fprintf(f, " %08" PRIx64 ": ", pos);
76 for (i = 0; i < 16; i++)
77 fprintf(f, " ");
78 fprintf(f, " ");
79}
80
81static int intel_pt_log_open(void)
82{
83 if (!enable_logging)
84 return -1;
85
86 if (f)
87 return 0;
88
89 if (!log_name[0])
90 return -1;
91
92 f = fopen(log_name, "w+");
93 if (!f) {
94 enable_logging = false;
95 return -1;
96 }
97
98 return 0;
99}
100
101void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
102 uint64_t pos, const unsigned char *buf)
103{
104 char desc[INTEL_PT_PKT_DESC_MAX];
105
106 if (intel_pt_log_open())
107 return;
108
109 intel_pt_print_data(buf, pkt_len, pos, 0);
110 intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
111 fprintf(f, "%s\n", desc);
112}
113
114void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
115{
116 char desc[INTEL_PT_INSN_DESC_MAX];
117 size_t len = intel_pt_insn->length;
118
119 if (intel_pt_log_open())
120 return;
121
122 if (len > INTEL_PT_INSN_DBG_BUF_SZ)
123 len = INTEL_PT_INSN_DBG_BUF_SZ;
124 intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
125 if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
126 fprintf(f, "%s\n", desc);
127 else
128 fprintf(f, "Bad instruction!\n");
129}
130
131void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
132{
133 char desc[INTEL_PT_INSN_DESC_MAX];
134
135 if (intel_pt_log_open())
136 return;
137
138 intel_pt_print_no_data(ip, 8);
139 if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
140 fprintf(f, "%s\n", desc);
141 else
142 fprintf(f, "Bad instruction!\n");
143}
144
145void intel_pt_log(const char *fmt, ...)
146{
147 va_list args;
148
149 if (intel_pt_log_open())
150 return;
151
152 va_start(args, fmt);
153 vfprintf(f, fmt, args);
154 va_end(args);
155}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 000000000000..db3942f83677
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,52 @@
1/*
2 * intel_pt_log.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__INTEL_PT_LOG_H__
17#define INCLUDE__INTEL_PT_LOG_H__
18
19#include <stdint.h>
20#include <inttypes.h>
21
22struct intel_pt_pkt;
23
24void intel_pt_log_enable(void);
25void intel_pt_log_disable(void);
26void intel_pt_log_set_name(const char *name);
27
28void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
29 uint64_t pos, const unsigned char *buf);
30
31struct intel_pt_insn;
32
33void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
34void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
35 uint64_t ip);
36
37__attribute__((format(printf, 1, 2)))
38void intel_pt_log(const char *fmt, ...);
39
40#define x64_fmt "0x%" PRIx64
41
42static inline void intel_pt_log_at(const char *msg, uint64_t u)
43{
44 intel_pt_log("%s at " x64_fmt "\n", msg, u);
45}
46
47static inline void intel_pt_log_to(const char *msg, uint64_t u)
48{
49 intel_pt_log("%s to " x64_fmt "\n", msg, u);
50}
51
52#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 000000000000..b1257c816310
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,518 @@
1/*
2 * intel_pt_pkt_decoder.c: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdio.h>
17#include <string.h>
18#include <endian.h>
19#include <byteswap.h>
20
21#include "intel-pt-pkt-decoder.h"
22
23#define BIT(n) (1 << (n))
24
25#define BIT63 ((uint64_t)1 << 63)
26
27#define NR_FLAG BIT63
28
29#if __BYTE_ORDER == __BIG_ENDIAN
30#define le16_to_cpu bswap_16
31#define le32_to_cpu bswap_32
32#define le64_to_cpu bswap_64
33#define memcpy_le64(d, s, n) do { \
34 memcpy((d), (s), (n)); \
35 *(d) = le64_to_cpu(*(d)); \
36} while (0)
37#else
38#define le16_to_cpu
39#define le32_to_cpu
40#define le64_to_cpu
41#define memcpy_le64 memcpy
42#endif
43
44static const char * const packet_name[] = {
45 [INTEL_PT_BAD] = "Bad Packet!",
46 [INTEL_PT_PAD] = "PAD",
47 [INTEL_PT_TNT] = "TNT",
48 [INTEL_PT_TIP_PGD] = "TIP.PGD",
49 [INTEL_PT_TIP_PGE] = "TIP.PGE",
50 [INTEL_PT_TSC] = "TSC",
51 [INTEL_PT_TMA] = "TMA",
52 [INTEL_PT_MODE_EXEC] = "MODE.Exec",
53 [INTEL_PT_MODE_TSX] = "MODE.TSX",
54 [INTEL_PT_MTC] = "MTC",
55 [INTEL_PT_TIP] = "TIP",
56 [INTEL_PT_FUP] = "FUP",
57 [INTEL_PT_CYC] = "CYC",
58 [INTEL_PT_VMCS] = "VMCS",
59 [INTEL_PT_PSB] = "PSB",
60 [INTEL_PT_PSBEND] = "PSBEND",
61 [INTEL_PT_CBR] = "CBR",
62 [INTEL_PT_TRACESTOP] = "TraceSTOP",
63 [INTEL_PT_PIP] = "PIP",
64 [INTEL_PT_OVF] = "OVF",
65 [INTEL_PT_MNT] = "MNT",
66};
67
68const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
69{
70 return packet_name[type];
71}
72
73static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
74 struct intel_pt_pkt *packet)
75{
76 uint64_t payload;
77 int count;
78
79 if (len < 8)
80 return INTEL_PT_NEED_MORE_BYTES;
81
82 payload = le64_to_cpu(*(uint64_t *)buf);
83
84 for (count = 47; count; count--) {
85 if (payload & BIT63)
86 break;
87 payload <<= 1;
88 }
89
90 packet->type = INTEL_PT_TNT;
91 packet->count = count;
92 packet->payload = payload << 1;
93 return 8;
94}
95
96static int intel_pt_get_pip(const unsigned char *buf, size_t len,
97 struct intel_pt_pkt *packet)
98{
99 uint64_t payload = 0;
100
101 if (len < 8)
102 return INTEL_PT_NEED_MORE_BYTES;
103
104 packet->type = INTEL_PT_PIP;
105 memcpy_le64(&payload, buf + 2, 6);
106 packet->payload = payload >> 1;
107 if (payload & 1)
108 packet->payload |= NR_FLAG;
109
110 return 8;
111}
112
113static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
114{
115 packet->type = INTEL_PT_TRACESTOP;
116 return 2;
117}
118
119static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
120 struct intel_pt_pkt *packet)
121{
122 if (len < 4)
123 return INTEL_PT_NEED_MORE_BYTES;
124 packet->type = INTEL_PT_CBR;
125 packet->payload = buf[2];
126 return 4;
127}
128
129static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
130 struct intel_pt_pkt *packet)
131{
132 unsigned int count = (52 - 5) >> 3;
133
134 if (count < 1 || count > 7)
135 return INTEL_PT_BAD_PACKET;
136
137 if (len < count + 2)
138 return INTEL_PT_NEED_MORE_BYTES;
139
140 packet->type = INTEL_PT_VMCS;
141 packet->count = count;
142 memcpy_le64(&packet->payload, buf + 2, count);
143
144 return count + 2;
145}
146
147static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
148{
149 packet->type = INTEL_PT_OVF;
150 return 2;
151}
152
153static int intel_pt_get_psb(const unsigned char *buf, size_t len,
154 struct intel_pt_pkt *packet)
155{
156 int i;
157
158 if (len < 16)
159 return INTEL_PT_NEED_MORE_BYTES;
160
161 for (i = 2; i < 16; i += 2) {
162 if (buf[i] != 2 || buf[i + 1] != 0x82)
163 return INTEL_PT_BAD_PACKET;
164 }
165
166 packet->type = INTEL_PT_PSB;
167 return 16;
168}
169
170static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
171{
172 packet->type = INTEL_PT_PSBEND;
173 return 2;
174}
175
176static int intel_pt_get_tma(const unsigned char *buf, size_t len,
177 struct intel_pt_pkt *packet)
178{
179 if (len < 7)
180 return INTEL_PT_NEED_MORE_BYTES;
181
182 packet->type = INTEL_PT_TMA;
183 packet->payload = buf[2] | (buf[3] << 8);
184 packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
185 return 7;
186}
187
188static int intel_pt_get_pad(struct intel_pt_pkt *packet)
189{
190 packet->type = INTEL_PT_PAD;
191 return 1;
192}
193
194static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
195 struct intel_pt_pkt *packet)
196{
197 if (len < 11)
198 return INTEL_PT_NEED_MORE_BYTES;
199 packet->type = INTEL_PT_MNT;
200 memcpy_le64(&packet->payload, buf + 3, 8);
201 return 11
202;
203}
204
205static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
206 struct intel_pt_pkt *packet)
207{
208 if (len < 3)
209 return INTEL_PT_NEED_MORE_BYTES;
210
211 switch (buf[2]) {
212 case 0x88: /* MNT */
213 return intel_pt_get_mnt(buf, len, packet);
214 default:
215 return INTEL_PT_BAD_PACKET;
216 }
217}
218
219static int intel_pt_get_ext(const unsigned char *buf, size_t len,
220 struct intel_pt_pkt *packet)
221{
222 if (len < 2)
223 return INTEL_PT_NEED_MORE_BYTES;
224
225 switch (buf[1]) {
226 case 0xa3: /* Long TNT */
227 return intel_pt_get_long_tnt(buf, len, packet);
228 case 0x43: /* PIP */
229 return intel_pt_get_pip(buf, len, packet);
230 case 0x83: /* TraceStop */
231 return intel_pt_get_tracestop(packet);
232 case 0x03: /* CBR */
233 return intel_pt_get_cbr(buf, len, packet);
234 case 0xc8: /* VMCS */
235 return intel_pt_get_vmcs(buf, len, packet);
236 case 0xf3: /* OVF */
237 return intel_pt_get_ovf(packet);
238 case 0x82: /* PSB */
239 return intel_pt_get_psb(buf, len, packet);
240 case 0x23: /* PSBEND */
241 return intel_pt_get_psbend(packet);
242 case 0x73: /* TMA */
243 return intel_pt_get_tma(buf, len, packet);
244 case 0xC3: /* 3-byte header */
245 return intel_pt_get_3byte(buf, len, packet);
246 default:
247 return INTEL_PT_BAD_PACKET;
248 }
249}
250
251static int intel_pt_get_short_tnt(unsigned int byte,
252 struct intel_pt_pkt *packet)
253{
254 int count;
255
256 for (count = 6; count; count--) {
257 if (byte & BIT(7))
258 break;
259 byte <<= 1;
260 }
261
262 packet->type = INTEL_PT_TNT;
263 packet->count = count;
264 packet->payload = (uint64_t)byte << 57;
265
266 return 1;
267}
268
269static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
270 size_t len, struct intel_pt_pkt *packet)
271{
272 unsigned int offs = 1, shift;
273 uint64_t payload = byte >> 3;
274
275 byte >>= 2;
276 len -= 1;
277 for (shift = 5; byte & 1; shift += 7) {
278 if (offs > 9)
279 return INTEL_PT_BAD_PACKET;
280 if (len < offs)
281 return INTEL_PT_NEED_MORE_BYTES;
282 byte = buf[offs++];
283 payload |= (byte >> 1) << shift;
284 }
285
286 packet->type = INTEL_PT_CYC;
287 packet->payload = payload;
288 return offs;
289}
290
291static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
292 const unsigned char *buf, size_t len,
293 struct intel_pt_pkt *packet)
294{
295 switch (byte >> 5) {
296 case 0:
297 packet->count = 0;
298 break;
299 case 1:
300 if (len < 3)
301 return INTEL_PT_NEED_MORE_BYTES;
302 packet->count = 2;
303 packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
304 break;
305 case 2:
306 if (len < 5)
307 return INTEL_PT_NEED_MORE_BYTES;
308 packet->count = 4;
309 packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
310 break;
311 case 3:
312 case 6:
313 if (len < 7)
314 return INTEL_PT_NEED_MORE_BYTES;
315 packet->count = 6;
316 memcpy_le64(&packet->payload, buf + 1, 6);
317 break;
318 default:
319 return INTEL_PT_BAD_PACKET;
320 }
321
322 packet->type = type;
323
324 return packet->count + 1;
325}
326
327static int intel_pt_get_mode(const unsigned char *buf, size_t len,
328 struct intel_pt_pkt *packet)
329{
330 if (len < 2)
331 return INTEL_PT_NEED_MORE_BYTES;
332
333 switch (buf[1] >> 5) {
334 case 0:
335 packet->type = INTEL_PT_MODE_EXEC;
336 switch (buf[1] & 3) {
337 case 0:
338 packet->payload = 16;
339 break;
340 case 1:
341 packet->payload = 64;
342 break;
343 case 2:
344 packet->payload = 32;
345 break;
346 default:
347 return INTEL_PT_BAD_PACKET;
348 }
349 break;
350 case 1:
351 packet->type = INTEL_PT_MODE_TSX;
352 if ((buf[1] & 3) == 3)
353 return INTEL_PT_BAD_PACKET;
354 packet->payload = buf[1] & 3;
355 break;
356 default:
357 return INTEL_PT_BAD_PACKET;
358 }
359
360 return 2;
361}
362
363static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
364 struct intel_pt_pkt *packet)
365{
366 if (len < 8)
367 return INTEL_PT_NEED_MORE_BYTES;
368 packet->type = INTEL_PT_TSC;
369 memcpy_le64(&packet->payload, buf + 1, 7);
370 return 8;
371}
372
373static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
374 struct intel_pt_pkt *packet)
375{
376 if (len < 2)
377 return INTEL_PT_NEED_MORE_BYTES;
378 packet->type = INTEL_PT_MTC;
379 packet->payload = buf[1];
380 return 2;
381}
382
383static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
384 struct intel_pt_pkt *packet)
385{
386 unsigned int byte;
387
388 memset(packet, 0, sizeof(struct intel_pt_pkt));
389
390 if (!len)
391 return INTEL_PT_NEED_MORE_BYTES;
392
393 byte = buf[0];
394 if (!(byte & BIT(0))) {
395 if (byte == 0)
396 return intel_pt_get_pad(packet);
397 if (byte == 2)
398 return intel_pt_get_ext(buf, len, packet);
399 return intel_pt_get_short_tnt(byte, packet);
400 }
401
402 if ((byte & 2))
403 return intel_pt_get_cyc(byte, buf, len, packet);
404
405 switch (byte & 0x1f) {
406 case 0x0D:
407 return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
408 case 0x11:
409 return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
410 packet);
411 case 0x01:
412 return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
413 packet);
414 case 0x1D:
415 return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
416 case 0x19:
417 switch (byte) {
418 case 0x99:
419 return intel_pt_get_mode(buf, len, packet);
420 case 0x19:
421 return intel_pt_get_tsc(buf, len, packet);
422 case 0x59:
423 return intel_pt_get_mtc(buf, len, packet);
424 default:
425 return INTEL_PT_BAD_PACKET;
426 }
427 default:
428 return INTEL_PT_BAD_PACKET;
429 }
430}
431
432int intel_pt_get_packet(const unsigned char *buf, size_t len,
433 struct intel_pt_pkt *packet)
434{
435 int ret;
436
437 ret = intel_pt_do_get_packet(buf, len, packet);
438 if (ret > 0) {
439 while (ret < 8 && len > (size_t)ret && !buf[ret])
440 ret += 1;
441 }
442 return ret;
443}
444
445int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
446 size_t buf_len)
447{
448 int ret, i, nr;
449 unsigned long long payload = packet->payload;
450 const char *name = intel_pt_pkt_name(packet->type);
451
452 switch (packet->type) {
453 case INTEL_PT_BAD:
454 case INTEL_PT_PAD:
455 case INTEL_PT_PSB:
456 case INTEL_PT_PSBEND:
457 case INTEL_PT_TRACESTOP:
458 case INTEL_PT_OVF:
459 return snprintf(buf, buf_len, "%s", name);
460 case INTEL_PT_TNT: {
461 size_t blen = buf_len;
462
463 ret = snprintf(buf, blen, "%s ", name);
464 if (ret < 0)
465 return ret;
466 buf += ret;
467 blen -= ret;
468 for (i = 0; i < packet->count; i++) {
469 if (payload & BIT63)
470 ret = snprintf(buf, blen, "T");
471 else
472 ret = snprintf(buf, blen, "N");
473 if (ret < 0)
474 return ret;
475 buf += ret;
476 blen -= ret;
477 payload <<= 1;
478 }
479 ret = snprintf(buf, blen, " (%d)", packet->count);
480 if (ret < 0)
481 return ret;
482 blen -= ret;
483 return buf_len - blen;
484 }
485 case INTEL_PT_TIP_PGD:
486 case INTEL_PT_TIP_PGE:
487 case INTEL_PT_TIP:
488 case INTEL_PT_FUP:
489 if (!(packet->count))
490 return snprintf(buf, buf_len, "%s no ip", name);
491 case INTEL_PT_CYC:
492 case INTEL_PT_VMCS:
493 case INTEL_PT_MTC:
494 case INTEL_PT_MNT:
495 case INTEL_PT_CBR:
496 case INTEL_PT_TSC:
497 return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
498 case INTEL_PT_TMA:
499 return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
500 (unsigned)payload, packet->count);
501 case INTEL_PT_MODE_EXEC:
502 return snprintf(buf, buf_len, "%s %lld", name, payload);
503 case INTEL_PT_MODE_TSX:
504 return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
505 name, (unsigned)(payload >> 1) & 1,
506 (unsigned)payload & 1);
507 case INTEL_PT_PIP:
508 nr = packet->payload & NR_FLAG ? 1 : 0;
509 payload &= ~NR_FLAG;
510 ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
511 name, payload, nr);
512 return ret;
513 default:
514 break;
515 }
516 return snprintf(buf, buf_len, "%s 0x%llx (%d)",
517 name, payload, packet->count);
518}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 000000000000..781bb79883bd
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,70 @@
1/*
2 * intel_pt_pkt_decoder.h: Intel Processor Trace support
3 * Copyright (c) 2013-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
17#define INCLUDE__INTEL_PT_PKT_DECODER_H__
18
19#include <stddef.h>
20#include <stdint.h>
21
22#define INTEL_PT_PKT_DESC_MAX 256
23
24#define INTEL_PT_NEED_MORE_BYTES -1
25#define INTEL_PT_BAD_PACKET -2
26
27#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
28 "\002\202\002\202\002\202\002\202"
29#define INTEL_PT_PSB_LEN 16
30
31#define INTEL_PT_PKT_MAX_SZ 16
32
33enum intel_pt_pkt_type {
34 INTEL_PT_BAD,
35 INTEL_PT_PAD,
36 INTEL_PT_TNT,
37 INTEL_PT_TIP_PGD,
38 INTEL_PT_TIP_PGE,
39 INTEL_PT_TSC,
40 INTEL_PT_TMA,
41 INTEL_PT_MODE_EXEC,
42 INTEL_PT_MODE_TSX,
43 INTEL_PT_MTC,
44 INTEL_PT_TIP,
45 INTEL_PT_FUP,
46 INTEL_PT_CYC,
47 INTEL_PT_VMCS,
48 INTEL_PT_PSB,
49 INTEL_PT_PSBEND,
50 INTEL_PT_CBR,
51 INTEL_PT_TRACESTOP,
52 INTEL_PT_PIP,
53 INTEL_PT_OVF,
54 INTEL_PT_MNT,
55};
56
57struct intel_pt_pkt {
58 enum intel_pt_pkt_type type;
59 int count;
60 uint64_t payload;
61};
62
63const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
64
65int intel_pt_get_packet(const unsigned char *buf, size_t len,
66 struct intel_pt_pkt *packet);
67
68int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
69
70#endif
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
new file mode 100644
index 000000000000..816488c0b97e
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -0,0 +1,970 @@
1# x86 Opcode Maps
2#
3# This is (mostly) based on following documentations.
4# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
5# (#326018-047US, June 2013)
6#
7#<Opcode maps>
8# Table: table-name
9# Referrer: escaped-name
10# AVXcode: avx-code
11# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
12# (or)
13# opcode: escape # escaped-name
14# EndTable
15#
16#<group maps>
17# GrpTable: GrpXXX
18# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
19# EndTable
20#
21# AVX Superscripts
22# (v): this opcode requires VEX prefix.
23# (v1): this opcode only supports 128bit VEX.
24#
25# Last Prefix Superscripts
26# - (66): the last prefix is 0x66
27# - (F3): the last prefix is 0xF3
28# - (F2): the last prefix is 0xF2
29# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
30# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
31
32Table: one byte opcode
33Referrer:
34AVXcode:
35# 0x00 - 0x0f
3600: ADD Eb,Gb
3701: ADD Ev,Gv
3802: ADD Gb,Eb
3903: ADD Gv,Ev
4004: ADD AL,Ib
4105: ADD rAX,Iz
4206: PUSH ES (i64)
4307: POP ES (i64)
4408: OR Eb,Gb
4509: OR Ev,Gv
460a: OR Gb,Eb
470b: OR Gv,Ev
480c: OR AL,Ib
490d: OR rAX,Iz
500e: PUSH CS (i64)
510f: escape # 2-byte escape
52# 0x10 - 0x1f
5310: ADC Eb,Gb
5411: ADC Ev,Gv
5512: ADC Gb,Eb
5613: ADC Gv,Ev
5714: ADC AL,Ib
5815: ADC rAX,Iz
5916: PUSH SS (i64)
6017: POP SS (i64)
6118: SBB Eb,Gb
6219: SBB Ev,Gv
631a: SBB Gb,Eb
641b: SBB Gv,Ev
651c: SBB AL,Ib
661d: SBB rAX,Iz
671e: PUSH DS (i64)
681f: POP DS (i64)
69# 0x20 - 0x2f
7020: AND Eb,Gb
7121: AND Ev,Gv
7222: AND Gb,Eb
7323: AND Gv,Ev
7424: AND AL,Ib
7525: AND rAx,Iz
7626: SEG=ES (Prefix)
7727: DAA (i64)
7828: SUB Eb,Gb
7929: SUB Ev,Gv
802a: SUB Gb,Eb
812b: SUB Gv,Ev
822c: SUB AL,Ib
832d: SUB rAX,Iz
842e: SEG=CS (Prefix)
852f: DAS (i64)
86# 0x30 - 0x3f
8730: XOR Eb,Gb
8831: XOR Ev,Gv
8932: XOR Gb,Eb
9033: XOR Gv,Ev
9134: XOR AL,Ib
9235: XOR rAX,Iz
9336: SEG=SS (Prefix)
9437: AAA (i64)
9538: CMP Eb,Gb
9639: CMP Ev,Gv
973a: CMP Gb,Eb
983b: CMP Gv,Ev
993c: CMP AL,Ib
1003d: CMP rAX,Iz
1013e: SEG=DS (Prefix)
1023f: AAS (i64)
103# 0x40 - 0x4f
10440: INC eAX (i64) | REX (o64)
10541: INC eCX (i64) | REX.B (o64)
10642: INC eDX (i64) | REX.X (o64)
10743: INC eBX (i64) | REX.XB (o64)
10844: INC eSP (i64) | REX.R (o64)
10945: INC eBP (i64) | REX.RB (o64)
11046: INC eSI (i64) | REX.RX (o64)
11147: INC eDI (i64) | REX.RXB (o64)
11248: DEC eAX (i64) | REX.W (o64)
11349: DEC eCX (i64) | REX.WB (o64)
1144a: DEC eDX (i64) | REX.WX (o64)
1154b: DEC eBX (i64) | REX.WXB (o64)
1164c: DEC eSP (i64) | REX.WR (o64)
1174d: DEC eBP (i64) | REX.WRB (o64)
1184e: DEC eSI (i64) | REX.WRX (o64)
1194f: DEC eDI (i64) | REX.WRXB (o64)
120# 0x50 - 0x5f
12150: PUSH rAX/r8 (d64)
12251: PUSH rCX/r9 (d64)
12352: PUSH rDX/r10 (d64)
12453: PUSH rBX/r11 (d64)
12554: PUSH rSP/r12 (d64)
12655: PUSH rBP/r13 (d64)
12756: PUSH rSI/r14 (d64)
12857: PUSH rDI/r15 (d64)
12958: POP rAX/r8 (d64)
13059: POP rCX/r9 (d64)
1315a: POP rDX/r10 (d64)
1325b: POP rBX/r11 (d64)
1335c: POP rSP/r12 (d64)
1345d: POP rBP/r13 (d64)
1355e: POP rSI/r14 (d64)
1365f: POP rDI/r15 (d64)
137# 0x60 - 0x6f
13860: PUSHA/PUSHAD (i64)
13961: POPA/POPAD (i64)
14062: BOUND Gv,Ma (i64)
14163: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
14264: SEG=FS (Prefix)
14365: SEG=GS (Prefix)
14466: Operand-Size (Prefix)
14567: Address-Size (Prefix)
14668: PUSH Iz (d64)
14769: IMUL Gv,Ev,Iz
1486a: PUSH Ib (d64)
1496b: IMUL Gv,Ev,Ib
1506c: INS/INSB Yb,DX
1516d: INS/INSW/INSD Yz,DX
1526e: OUTS/OUTSB DX,Xb
1536f: OUTS/OUTSW/OUTSD DX,Xz
154# 0x70 - 0x7f
15570: JO Jb
15671: JNO Jb
15772: JB/JNAE/JC Jb
15873: JNB/JAE/JNC Jb
15974: JZ/JE Jb
16075: JNZ/JNE Jb
16176: JBE/JNA Jb
16277: JNBE/JA Jb
16378: JS Jb
16479: JNS Jb
1657a: JP/JPE Jb
1667b: JNP/JPO Jb
1677c: JL/JNGE Jb
1687d: JNL/JGE Jb
1697e: JLE/JNG Jb
1707f: JNLE/JG Jb
171# 0x80 - 0x8f
17280: Grp1 Eb,Ib (1A)
17381: Grp1 Ev,Iz (1A)
17482: Grp1 Eb,Ib (1A),(i64)
17583: Grp1 Ev,Ib (1A)
17684: TEST Eb,Gb
17785: TEST Ev,Gv
17886: XCHG Eb,Gb
17987: XCHG Ev,Gv
18088: MOV Eb,Gb
18189: MOV Ev,Gv
1828a: MOV Gb,Eb
1838b: MOV Gv,Ev
1848c: MOV Ev,Sw
1858d: LEA Gv,M
1868e: MOV Sw,Ew
1878f: Grp1A (1A) | POP Ev (d64)
188# 0x90 - 0x9f
18990: NOP | PAUSE (F3) | XCHG r8,rAX
19091: XCHG rCX/r9,rAX
19192: XCHG rDX/r10,rAX
19293: XCHG rBX/r11,rAX
19394: XCHG rSP/r12,rAX
19495: XCHG rBP/r13,rAX
19596: XCHG rSI/r14,rAX
19697: XCHG rDI/r15,rAX
19798: CBW/CWDE/CDQE
19899: CWD/CDQ/CQO
1999a: CALLF Ap (i64)
2009b: FWAIT/WAIT
2019c: PUSHF/D/Q Fv (d64)
2029d: POPF/D/Q Fv (d64)
2039e: SAHF
2049f: LAHF
205# 0xa0 - 0xaf
206a0: MOV AL,Ob
207a1: MOV rAX,Ov
208a2: MOV Ob,AL
209a3: MOV Ov,rAX
210a4: MOVS/B Yb,Xb
211a5: MOVS/W/D/Q Yv,Xv
212a6: CMPS/B Xb,Yb
213a7: CMPS/W/D Xv,Yv
214a8: TEST AL,Ib
215a9: TEST rAX,Iz
216aa: STOS/B Yb,AL
217ab: STOS/W/D/Q Yv,rAX
218ac: LODS/B AL,Xb
219ad: LODS/W/D/Q rAX,Xv
220ae: SCAS/B AL,Yb
221# Note: The May 2011 Intel manual shows Xv for the second parameter of the
222# next instruction but Yv is correct
223af: SCAS/W/D/Q rAX,Yv
224# 0xb0 - 0xbf
225b0: MOV AL/R8L,Ib
226b1: MOV CL/R9L,Ib
227b2: MOV DL/R10L,Ib
228b3: MOV BL/R11L,Ib
229b4: MOV AH/R12L,Ib
230b5: MOV CH/R13L,Ib
231b6: MOV DH/R14L,Ib
232b7: MOV BH/R15L,Ib
233b8: MOV rAX/r8,Iv
234b9: MOV rCX/r9,Iv
235ba: MOV rDX/r10,Iv
236bb: MOV rBX/r11,Iv
237bc: MOV rSP/r12,Iv
238bd: MOV rBP/r13,Iv
239be: MOV rSI/r14,Iv
240bf: MOV rDI/r15,Iv
241# 0xc0 - 0xcf
242c0: Grp2 Eb,Ib (1A)
243c1: Grp2 Ev,Ib (1A)
244c2: RETN Iw (f64)
245c3: RETN
246c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
247c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
248c6: Grp11A Eb,Ib (1A)
249c7: Grp11B Ev,Iz (1A)
250c8: ENTER Iw,Ib
251c9: LEAVE (d64)
252ca: RETF Iw
253cb: RETF
254cc: INT3
255cd: INT Ib
256ce: INTO (i64)
257cf: IRET/D/Q
258# 0xd0 - 0xdf
259d0: Grp2 Eb,1 (1A)
260d1: Grp2 Ev,1 (1A)
261d2: Grp2 Eb,CL (1A)
262d3: Grp2 Ev,CL (1A)
263d4: AAM Ib (i64)
264d5: AAD Ib (i64)
265d6:
266d7: XLAT/XLATB
267d8: ESC
268d9: ESC
269da: ESC
270db: ESC
271dc: ESC
272dd: ESC
273de: ESC
274df: ESC
275# 0xe0 - 0xef
276# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
277# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
278# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
279e0: LOOPNE/LOOPNZ Jb (f64)
280e1: LOOPE/LOOPZ Jb (f64)
281e2: LOOP Jb (f64)
282e3: JrCXZ Jb (f64)
283e4: IN AL,Ib
284e5: IN eAX,Ib
285e6: OUT Ib,AL
286e7: OUT Ib,eAX
287# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
288# in "near" jumps and calls is 16-bit. For CALL,
289# push of return address is 16-bit wide, RSP is decremented by 2
290# but is not truncated to 16 bits, unlike RIP.
291e8: CALL Jz (f64)
292e9: JMP-near Jz (f64)
293ea: JMP-far Ap (i64)
294eb: JMP-short Jb (f64)
295ec: IN AL,DX
296ed: IN eAX,DX
297ee: OUT DX,AL
298ef: OUT DX,eAX
299# 0xf0 - 0xff
300f0: LOCK (Prefix)
301f1:
302f2: REPNE (Prefix) | XACQUIRE (Prefix)
303f3: REP/REPE (Prefix) | XRELEASE (Prefix)
304f4: HLT
305f5: CMC
306f6: Grp3_1 Eb (1A)
307f7: Grp3_2 Ev (1A)
308f8: CLC
309f9: STC
310fa: CLI
311fb: STI
312fc: CLD
313fd: STD
314fe: Grp4 (1A)
315ff: Grp5 (1A)
316EndTable
317
318Table: 2-byte opcode (0x0f)
319Referrer: 2-byte escape
320AVXcode: 1
321# 0x0f 0x00-0x0f
32200: Grp6 (1A)
32301: Grp7 (1A)
32402: LAR Gv,Ew
32503: LSL Gv,Ew
32604:
32705: SYSCALL (o64)
32806: CLTS
32907: SYSRET (o64)
33008: INVD
33109: WBINVD
3320a:
3330b: UD2 (1B)
3340c:
335# AMD's prefetch group. Intel supports prefetchw(/1) only.
3360d: GrpP
3370e: FEMMS
338# 3DNow! uses the last imm byte as opcode extension.
3390f: 3DNow! Pq,Qq,Ib
340# 0x0f 0x10-0x1f
341# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
342# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
343# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
344# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
345# Reference A.1
34610: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
34711: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
34812: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
34913: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
35014: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
35115: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
35216: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
35317: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
35418: Grp16 (1A)
35519:
3561a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
3571b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
3581c:
3591d:
3601e:
3611f: NOP Ev
362# 0x0f 0x20-0x2f
36320: MOV Rd,Cd
36421: MOV Rd,Dd
36522: MOV Cd,Rd
36623: MOV Dd,Rd
36724:
36825:
36926:
37027:
37128: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
37229: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
3732a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
3742b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
3752c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
3762d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
3772e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
3782f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
379# 0x0f 0x30-0x3f
38030: WRMSR
38131: RDTSC
38232: RDMSR
38333: RDPMC
38434: SYSENTER
38535: SYSEXIT
38636:
38737: GETSEC
38838: escape # 3-byte escape 1
38939:
3903a: escape # 3-byte escape 2
3913b:
3923c:
3933d:
3943e:
3953f:
396# 0x0f 0x40-0x4f
39740: CMOVO Gv,Ev
39841: CMOVNO Gv,Ev
39942: CMOVB/C/NAE Gv,Ev
40043: CMOVAE/NB/NC Gv,Ev
40144: CMOVE/Z Gv,Ev
40245: CMOVNE/NZ Gv,Ev
40346: CMOVBE/NA Gv,Ev
40447: CMOVA/NBE Gv,Ev
40548: CMOVS Gv,Ev
40649: CMOVNS Gv,Ev
4074a: CMOVP/PE Gv,Ev
4084b: CMOVNP/PO Gv,Ev
4094c: CMOVL/NGE Gv,Ev
4104d: CMOVNL/GE Gv,Ev
4114e: CMOVLE/NG Gv,Ev
4124f: CMOVNLE/G Gv,Ev
413# 0x0f 0x50-0x5f
41450: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
41551: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
41652: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
41753: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
41854: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
41955: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
42056: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
42157: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
42258: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
42359: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
4245a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
4255b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
4265c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
4275d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
4285e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
4295f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
430# 0x0f 0x60-0x6f
43160: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
43261: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
43362: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
43463: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
43564: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
43665: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
43766: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
43867: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
43968: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
44069: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
4416a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
4426b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
4436c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
4446d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
4456e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
4466f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
447# 0x0f 0x70-0x7f
44870: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
44971: Grp12 (1A)
45072: Grp13 (1A)
45173: Grp14 (1A)
45274: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
45375: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
45476: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
455# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
45677: emms | vzeroupper | vzeroall
45778: VMREAD Ey,Gy
45879: VMWRITE Gy,Ey
4597a:
4607b:
4617c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
4627d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
4637e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
4647f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
465# 0x0f 0x80-0x8f
466# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
46780: JO Jz (f64)
46881: JNO Jz (f64)
46982: JB/JC/JNAE Jz (f64)
47083: JAE/JNB/JNC Jz (f64)
47184: JE/JZ Jz (f64)
47285: JNE/JNZ Jz (f64)
47386: JBE/JNA Jz (f64)
47487: JA/JNBE Jz (f64)
47588: JS Jz (f64)
47689: JNS Jz (f64)
4778a: JP/JPE Jz (f64)
4788b: JNP/JPO Jz (f64)
4798c: JL/JNGE Jz (f64)
4808d: JNL/JGE Jz (f64)
4818e: JLE/JNG Jz (f64)
4828f: JNLE/JG Jz (f64)
483# 0x0f 0x90-0x9f
48490: SETO Eb
48591: SETNO Eb
48692: SETB/C/NAE Eb
48793: SETAE/NB/NC Eb
48894: SETE/Z Eb
48995: SETNE/NZ Eb
49096: SETBE/NA Eb
49197: SETA/NBE Eb
49298: SETS Eb
49399: SETNS Eb
4949a: SETP/PE Eb
4959b: SETNP/PO Eb
4969c: SETL/NGE Eb
4979d: SETNL/GE Eb
4989e: SETLE/NG Eb
4999f: SETNLE/G Eb
500# 0x0f 0xa0-0xaf
501a0: PUSH FS (d64)
502a1: POP FS (d64)
503a2: CPUID
504a3: BT Ev,Gv
505a4: SHLD Ev,Gv,Ib
506a5: SHLD Ev,Gv,CL
507a6: GrpPDLK
508a7: GrpRNG
509a8: PUSH GS (d64)
510a9: POP GS (d64)
511aa: RSM
512ab: BTS Ev,Gv
513ac: SHRD Ev,Gv,Ib
514ad: SHRD Ev,Gv,CL
515ae: Grp15 (1A),(1C)
516af: IMUL Gv,Ev
517# 0x0f 0xb0-0xbf
518b0: CMPXCHG Eb,Gb
519b1: CMPXCHG Ev,Gv
520b2: LSS Gv,Mp
521b3: BTR Ev,Gv
522b4: LFS Gv,Mp
523b5: LGS Gv,Mp
524b6: MOVZX Gv,Eb
525b7: MOVZX Gv,Ew
526b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
527b9: Grp10 (1A)
528ba: Grp8 Ev,Ib (1A)
529bb: BTC Ev,Gv
530bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
531bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
532be: MOVSX Gv,Eb
533bf: MOVSX Gv,Ew
534# 0x0f 0xc0-0xcf
535c0: XADD Eb,Gb
536c1: XADD Ev,Gv
537c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
538c3: movnti My,Gy
539c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
540c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
541c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
542c7: Grp9 (1A)
543c8: BSWAP RAX/EAX/R8/R8D
544c9: BSWAP RCX/ECX/R9/R9D
545ca: BSWAP RDX/EDX/R10/R10D
546cb: BSWAP RBX/EBX/R11/R11D
547cc: BSWAP RSP/ESP/R12/R12D
548cd: BSWAP RBP/EBP/R13/R13D
549ce: BSWAP RSI/ESI/R14/R14D
550cf: BSWAP RDI/EDI/R15/R15D
551# 0x0f 0xd0-0xdf
552d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
553d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
554d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
555d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
556d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
557d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
558d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
559d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
560d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
561d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
562da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
563db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
564dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
565dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
566de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
567df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
568# 0x0f 0xe0-0xef
569e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
570e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
571e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
572e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
573e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
574e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
575e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
576e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
577e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
578e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
579ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
580eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
581ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
582ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
583ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
584ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
585# 0x0f 0xf0-0xff
586f0: vlddqu Vx,Mx (F2)
587f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
588f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
589f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
590f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
591f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
592f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
593f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
594f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
595f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
596fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
597fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
598fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
599fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
600fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
601ff:
602EndTable
603
604Table: 3-byte opcode 1 (0x0f 0x38)
605Referrer: 3-byte escape 1
606AVXcode: 2
607# 0x0f 0x38 0x00-0x0f
60800: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
60901: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
61002: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
61103: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
61204: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
61305: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
61406: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
61507: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
61608: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
61709: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
6180a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
6190b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
6200c: vpermilps Vx,Hx,Wx (66),(v)
6210d: vpermilpd Vx,Hx,Wx (66),(v)
6220e: vtestps Vx,Wx (66),(v)
6230f: vtestpd Vx,Wx (66),(v)
624# 0x0f 0x38 0x10-0x1f
62510: pblendvb Vdq,Wdq (66)
62611:
62712:
62813: vcvtph2ps Vx,Wx,Ib (66),(v)
62914: blendvps Vdq,Wdq (66)
63015: blendvpd Vdq,Wdq (66)
63116: vpermps Vqq,Hqq,Wqq (66),(v)
63217: vptest Vx,Wx (66)
63318: vbroadcastss Vx,Wd (66),(v)
63419: vbroadcastsd Vqq,Wq (66),(v)
6351a: vbroadcastf128 Vqq,Mdq (66),(v)
6361b:
6371c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
6381d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
6391e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
6401f:
641# 0x0f 0x38 0x20-0x2f
64220: vpmovsxbw Vx,Ux/Mq (66),(v1)
64321: vpmovsxbd Vx,Ux/Md (66),(v1)
64422: vpmovsxbq Vx,Ux/Mw (66),(v1)
64523: vpmovsxwd Vx,Ux/Mq (66),(v1)
64624: vpmovsxwq Vx,Ux/Md (66),(v1)
64725: vpmovsxdq Vx,Ux/Mq (66),(v1)
64826:
64927:
65028: vpmuldq Vx,Hx,Wx (66),(v1)
65129: vpcmpeqq Vx,Hx,Wx (66),(v1)
6522a: vmovntdqa Vx,Mx (66),(v1)
6532b: vpackusdw Vx,Hx,Wx (66),(v1)
6542c: vmaskmovps Vx,Hx,Mx (66),(v)
6552d: vmaskmovpd Vx,Hx,Mx (66),(v)
6562e: vmaskmovps Mx,Hx,Vx (66),(v)
6572f: vmaskmovpd Mx,Hx,Vx (66),(v)
658# 0x0f 0x38 0x30-0x3f
65930: vpmovzxbw Vx,Ux/Mq (66),(v1)
66031: vpmovzxbd Vx,Ux/Md (66),(v1)
66132: vpmovzxbq Vx,Ux/Mw (66),(v1)
66233: vpmovzxwd Vx,Ux/Mq (66),(v1)
66334: vpmovzxwq Vx,Ux/Md (66),(v1)
66435: vpmovzxdq Vx,Ux/Mq (66),(v1)
66536: vpermd Vqq,Hqq,Wqq (66),(v)
66637: vpcmpgtq Vx,Hx,Wx (66),(v1)
66738: vpminsb Vx,Hx,Wx (66),(v1)
66839: vpminsd Vx,Hx,Wx (66),(v1)
6693a: vpminuw Vx,Hx,Wx (66),(v1)
6703b: vpminud Vx,Hx,Wx (66),(v1)
6713c: vpmaxsb Vx,Hx,Wx (66),(v1)
6723d: vpmaxsd Vx,Hx,Wx (66),(v1)
6733e: vpmaxuw Vx,Hx,Wx (66),(v1)
6743f: vpmaxud Vx,Hx,Wx (66),(v1)
675# 0x0f 0x38 0x40-0x8f
67640: vpmulld Vx,Hx,Wx (66),(v1)
67741: vphminposuw Vdq,Wdq (66),(v1)
67842:
67943:
68044:
68145: vpsrlvd/q Vx,Hx,Wx (66),(v)
68246: vpsravd Vx,Hx,Wx (66),(v)
68347: vpsllvd/q Vx,Hx,Wx (66),(v)
684# Skip 0x48-0x57
68558: vpbroadcastd Vx,Wx (66),(v)
68659: vpbroadcastq Vx,Wx (66),(v)
6875a: vbroadcasti128 Vqq,Mdq (66),(v)
688# Skip 0x5b-0x77
68978: vpbroadcastb Vx,Wx (66),(v)
69079: vpbroadcastw Vx,Wx (66),(v)
691# Skip 0x7a-0x7f
69280: INVEPT Gy,Mdq (66)
69381: INVPID Gy,Mdq (66)
69482: INVPCID Gy,Mdq (66)
6958c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
6968e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
697# 0x0f 0x38 0x90-0xbf (FMA)
69890: vgatherdd/q Vx,Hx,Wx (66),(v)
69991: vgatherqd/q Vx,Hx,Wx (66),(v)
70092: vgatherdps/d Vx,Hx,Wx (66),(v)
70193: vgatherqps/d Vx,Hx,Wx (66),(v)
70294:
70395:
70496: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
70597: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
70698: vfmadd132ps/d Vx,Hx,Wx (66),(v)
70799: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
7089a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
7099b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
7109c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
7119d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
7129e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
7139f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
714a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
715a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
716a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
717a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
718aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
719ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
720ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
721ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
722ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
723af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
724b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
725b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
726b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
727b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
728ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
729bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
730bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
731bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
732be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
733bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
734# 0x0f 0x38 0xc0-0xff
735db: VAESIMC Vdq,Wdq (66),(v1)
736dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
737dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
738de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
739df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
740f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
741f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
742f2: ANDN Gy,By,Ey (v)
743f3: Grp17 (1A)
744f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
745f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
746f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
747EndTable
748
749Table: 3-byte opcode 2 (0x0f 0x3a)
750Referrer: 3-byte escape 2
751AVXcode: 3
752# 0x0f 0x3a 0x00-0xff
75300: vpermq Vqq,Wqq,Ib (66),(v)
75401: vpermpd Vqq,Wqq,Ib (66),(v)
75502: vpblendd Vx,Hx,Wx,Ib (66),(v)
75603:
75704: vpermilps Vx,Wx,Ib (66),(v)
75805: vpermilpd Vx,Wx,Ib (66),(v)
75906: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
76007:
76108: vroundps Vx,Wx,Ib (66)
76209: vroundpd Vx,Wx,Ib (66)
7630a: vroundss Vss,Wss,Ib (66),(v1)
7640b: vroundsd Vsd,Wsd,Ib (66),(v1)
7650c: vblendps Vx,Hx,Wx,Ib (66)
7660d: vblendpd Vx,Hx,Wx,Ib (66)
7670e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
7680f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
76914: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
77015: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
77116: vpextrd/q Ey,Vdq,Ib (66),(v1)
77217: vextractps Ed,Vdq,Ib (66),(v1)
77318: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
77419: vextractf128 Wdq,Vqq,Ib (66),(v)
7751d: vcvtps2ph Wx,Vx,Ib (66),(v)
77620: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
77721: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
77822: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
77938: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
78039: vextracti128 Wdq,Vqq,Ib (66),(v)
78140: vdpps Vx,Hx,Wx,Ib (66)
78241: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
78342: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
78444: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
78546: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
7864a: vblendvps Vx,Hx,Wx,Lx (66),(v)
7874b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
7884c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
78960: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
79061: vpcmpestri Vdq,Wdq,Ib (66),(v1)
79162: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
79263: vpcmpistri Vdq,Wdq,Ib (66),(v1)
793df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
794f0: RORX Gy,Ey,Ib (F2),(v)
795EndTable
796
797GrpTable: Grp1
7980: ADD
7991: OR
8002: ADC
8013: SBB
8024: AND
8035: SUB
8046: XOR
8057: CMP
806EndTable
807
808GrpTable: Grp1A
8090: POP
810EndTable
811
812GrpTable: Grp2
8130: ROL
8141: ROR
8152: RCL
8163: RCR
8174: SHL/SAL
8185: SHR
8196:
8207: SAR
821EndTable
822
823GrpTable: Grp3_1
8240: TEST Eb,Ib
8251:
8262: NOT Eb
8273: NEG Eb
8284: MUL AL,Eb
8295: IMUL AL,Eb
8306: DIV AL,Eb
8317: IDIV AL,Eb
832EndTable
833
834GrpTable: Grp3_2
8350: TEST Ev,Iz
8361:
8372: NOT Ev
8383: NEG Ev
8394: MUL rAX,Ev
8405: IMUL rAX,Ev
8416: DIV rAX,Ev
8427: IDIV rAX,Ev
843EndTable
844
845GrpTable: Grp4
8460: INC Eb
8471: DEC Eb
848EndTable
849
850GrpTable: Grp5
8510: INC Ev
8521: DEC Ev
853# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
8542: CALLN Ev (f64)
8553: CALLF Ep
8564: JMPN Ev (f64)
8575: JMPF Mp
8586: PUSH Ev (d64)
8597:
860EndTable
861
862GrpTable: Grp6
8630: SLDT Rv/Mw
8641: STR Rv/Mw
8652: LLDT Ew
8663: LTR Ew
8674: VERR Ew
8685: VERW Ew
869EndTable
870
871GrpTable: Grp7
8720: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8731: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
8742: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
8753: LIDT Ms
8764: SMSW Mw/Rv
8775:
8786: LMSW Ew
8797: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
880EndTable
881
882GrpTable: Grp8
8834: BT
8845: BTS
8856: BTR
8867: BTC
887EndTable
888
889GrpTable: Grp9
8901: CMPXCHG8B/16B Mq/Mdq
8916: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
8927: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
893EndTable
894
895GrpTable: Grp10
896EndTable
897
898# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
899GrpTable: Grp11A
9000: MOV Eb,Ib
9017: XABORT Ib (000),(11B)
902EndTable
903
904GrpTable: Grp11B
9050: MOV Eb,Iz
9067: XBEGIN Jz (000),(11B)
907EndTable
908
909GrpTable: Grp12
9102: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
9114: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
9126: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
913EndTable
914
915GrpTable: Grp13
9162: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
9174: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
9186: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
919EndTable
920
921GrpTable: Grp14
9222: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
9233: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
9246: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
9257: vpslldq Hx,Ux,Ib (66),(11B),(v1)
926EndTable
927
928GrpTable: Grp15
9290: fxsave | RDFSBASE Ry (F3),(11B)
9301: fxstor | RDGSBASE Ry (F3),(11B)
9312: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
9323: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
9334: XSAVE
9345: XRSTOR | lfence (11B)
9356: XSAVEOPT | mfence (11B)
9367: clflush | sfence (11B)
937EndTable
938
939GrpTable: Grp16
9400: prefetch NTA
9411: prefetch T0
9422: prefetch T1
9433: prefetch T2
944EndTable
945
946GrpTable: Grp17
9471: BLSR By,Ey (v)
9482: BLSMSK By,Ey (v)
9493: BLSI By,Ey (v)
950EndTable
951
952# AMD's Prefetch Group
953GrpTable: GrpP
9540: PREFETCH
9551: PREFETCHW
956EndTable
957
958GrpTable: GrpPDLK
9590: MONTMUL
9601: XSHA1
9612: XSHA2
962EndTable
963
964GrpTable: GrpRNG
9650: xstore-rng
9661: xcrypt-ecb
9672: xcrypt-cbc
9684: xcrypt-cfb
9695: xcrypt-ofb
970EndTable
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
new file mode 100644
index 000000000000..bb41c20e6005
--- /dev/null
+++ b/tools/perf/util/intel-pt.c
@@ -0,0 +1,1956 @@
1/*
2 * intel_pt.c: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <stdio.h>
17#include <stdbool.h>
18#include <errno.h>
19#include <linux/kernel.h>
20#include <linux/types.h>
21
22#include "../perf.h"
23#include "session.h"
24#include "machine.h"
25#include "tool.h"
26#include "event.h"
27#include "evlist.h"
28#include "evsel.h"
29#include "map.h"
30#include "color.h"
31#include "util.h"
32#include "thread.h"
33#include "thread-stack.h"
34#include "symbol.h"
35#include "callchain.h"
36#include "dso.h"
37#include "debug.h"
38#include "auxtrace.h"
39#include "tsc.h"
40#include "intel-pt.h"
41
42#include "intel-pt-decoder/intel-pt-log.h"
43#include "intel-pt-decoder/intel-pt-decoder.h"
44#include "intel-pt-decoder/intel-pt-insn-decoder.h"
45#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
46
47#define MAX_TIMESTAMP (~0ULL)
48
49struct intel_pt {
50 struct auxtrace auxtrace;
51 struct auxtrace_queues queues;
52 struct auxtrace_heap heap;
53 u32 auxtrace_type;
54 struct perf_session *session;
55 struct machine *machine;
56 struct perf_evsel *switch_evsel;
57 struct thread *unknown_thread;
58 bool timeless_decoding;
59 bool sampling_mode;
60 bool snapshot_mode;
61 bool per_cpu_mmaps;
62 bool have_tsc;
63 bool data_queued;
64 bool est_tsc;
65 bool sync_switch;
66 int have_sched_switch;
67 u32 pmu_type;
68 u64 kernel_start;
69 u64 switch_ip;
70 u64 ptss_ip;
71
72 struct perf_tsc_conversion tc;
73 bool cap_user_time_zero;
74
75 struct itrace_synth_opts synth_opts;
76
77 bool sample_instructions;
78 u64 instructions_sample_type;
79 u64 instructions_sample_period;
80 u64 instructions_id;
81
82 bool sample_branches;
83 u32 branches_filter;
84 u64 branches_sample_type;
85 u64 branches_id;
86
87 bool sample_transactions;
88 u64 transactions_sample_type;
89 u64 transactions_id;
90
91 bool synth_needs_swap;
92
93 u64 tsc_bit;
94 u64 mtc_bit;
95 u64 mtc_freq_bits;
96 u32 tsc_ctc_ratio_n;
97 u32 tsc_ctc_ratio_d;
98 u64 cyc_bit;
99 u64 noretcomp_bit;
100 unsigned max_non_turbo_ratio;
101};
102
103enum switch_state {
104 INTEL_PT_SS_NOT_TRACING,
105 INTEL_PT_SS_UNKNOWN,
106 INTEL_PT_SS_TRACING,
107 INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
108 INTEL_PT_SS_EXPECTING_SWITCH_IP,
109};
110
111struct intel_pt_queue {
112 struct intel_pt *pt;
113 unsigned int queue_nr;
114 struct auxtrace_buffer *buffer;
115 void *decoder;
116 const struct intel_pt_state *state;
117 struct ip_callchain *chain;
118 union perf_event *event_buf;
119 bool on_heap;
120 bool stop;
121 bool step_through_buffers;
122 bool use_buffer_pid_tid;
123 pid_t pid, tid;
124 int cpu;
125 int switch_state;
126 pid_t next_tid;
127 struct thread *thread;
128 bool exclude_kernel;
129 bool have_sample;
130 u64 time;
131 u64 timestamp;
132 u32 flags;
133 u16 insn_len;
134 u64 last_insn_cnt;
135};
136
137static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
138 unsigned char *buf, size_t len)
139{
140 struct intel_pt_pkt packet;
141 size_t pos = 0;
142 int ret, pkt_len, i;
143 char desc[INTEL_PT_PKT_DESC_MAX];
144 const char *color = PERF_COLOR_BLUE;
145
146 color_fprintf(stdout, color,
147 ". ... Intel Processor Trace data: size %zu bytes\n",
148 len);
149
150 while (len) {
151 ret = intel_pt_get_packet(buf, len, &packet);
152 if (ret > 0)
153 pkt_len = ret;
154 else
155 pkt_len = 1;
156 printf(".");
157 color_fprintf(stdout, color, " %08x: ", pos);
158 for (i = 0; i < pkt_len; i++)
159 color_fprintf(stdout, color, " %02x", buf[i]);
160 for (; i < 16; i++)
161 color_fprintf(stdout, color, " ");
162 if (ret > 0) {
163 ret = intel_pt_pkt_desc(&packet, desc,
164 INTEL_PT_PKT_DESC_MAX);
165 if (ret > 0)
166 color_fprintf(stdout, color, " %s\n", desc);
167 } else {
168 color_fprintf(stdout, color, " Bad packet!\n");
169 }
170 pos += pkt_len;
171 buf += pkt_len;
172 len -= pkt_len;
173 }
174}
175
176static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
177 size_t len)
178{
179 printf(".\n");
180 intel_pt_dump(pt, buf, len);
181}
182
183static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
184 struct auxtrace_buffer *b)
185{
186 void *start;
187
188 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
189 pt->have_tsc);
190 if (!start)
191 return -EINVAL;
192 b->use_size = b->data + b->size - start;
193 b->use_data = start;
194 return 0;
195}
196
197static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
198 struct auxtrace_queue *queue,
199 struct auxtrace_buffer *buffer)
200{
201 if (queue->cpu == -1 && buffer->cpu != -1)
202 ptq->cpu = buffer->cpu;
203
204 ptq->pid = buffer->pid;
205 ptq->tid = buffer->tid;
206
207 intel_pt_log("queue %u cpu %d pid %d tid %d\n",
208 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
209
210 thread__zput(ptq->thread);
211
212 if (ptq->tid != -1) {
213 if (ptq->pid != -1)
214 ptq->thread = machine__findnew_thread(ptq->pt->machine,
215 ptq->pid,
216 ptq->tid);
217 else
218 ptq->thread = machine__find_thread(ptq->pt->machine, -1,
219 ptq->tid);
220 }
221}
222
223/* This function assumes data is processed sequentially only */
224static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
225{
226 struct intel_pt_queue *ptq = data;
227 struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
228 struct auxtrace_queue *queue;
229
230 if (ptq->stop) {
231 b->len = 0;
232 return 0;
233 }
234
235 queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
236
237 buffer = auxtrace_buffer__next(queue, buffer);
238 if (!buffer) {
239 if (old_buffer)
240 auxtrace_buffer__drop_data(old_buffer);
241 b->len = 0;
242 return 0;
243 }
244
245 ptq->buffer = buffer;
246
247 if (!buffer->data) {
248 int fd = perf_data_file__fd(ptq->pt->session->file);
249
250 buffer->data = auxtrace_buffer__get_data(buffer, fd);
251 if (!buffer->data)
252 return -ENOMEM;
253 }
254
255 if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
256 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
257 return -ENOMEM;
258
259 if (old_buffer)
260 auxtrace_buffer__drop_data(old_buffer);
261
262 if (buffer->use_data) {
263 b->len = buffer->use_size;
264 b->buf = buffer->use_data;
265 } else {
266 b->len = buffer->size;
267 b->buf = buffer->data;
268 }
269 b->ref_timestamp = buffer->reference;
270
271 if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
272 !buffer->consecutive)) {
273 b->consecutive = false;
274 b->trace_nr = buffer->buffer_nr + 1;
275 } else {
276 b->consecutive = true;
277 }
278
279 if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
280 ptq->tid != buffer->tid))
281 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
282
283 if (ptq->step_through_buffers)
284 ptq->stop = true;
285
286 if (!b->len)
287 return intel_pt_get_trace(b, data);
288
289 return 0;
290}
291
292struct intel_pt_cache_entry {
293 struct auxtrace_cache_entry entry;
294 u64 insn_cnt;
295 u64 byte_cnt;
296 enum intel_pt_insn_op op;
297 enum intel_pt_insn_branch branch;
298 int length;
299 int32_t rel;
300};
301
302static int intel_pt_config_div(const char *var, const char *value, void *data)
303{
304 int *d = data;
305 long val;
306
307 if (!strcmp(var, "intel-pt.cache-divisor")) {
308 val = strtol(value, NULL, 0);
309 if (val > 0 && val <= INT_MAX)
310 *d = val;
311 }
312
313 return 0;
314}
315
316static int intel_pt_cache_divisor(void)
317{
318 static int d;
319
320 if (d)
321 return d;
322
323 perf_config(intel_pt_config_div, &d);
324
325 if (!d)
326 d = 64;
327
328 return d;
329}
330
331static unsigned int intel_pt_cache_size(struct dso *dso,
332 struct machine *machine)
333{
334 off_t size;
335
336 size = dso__data_size(dso, machine);
337 size /= intel_pt_cache_divisor();
338 if (size < 1000)
339 return 10;
340 if (size > (1 << 21))
341 return 21;
342 return 32 - __builtin_clz(size);
343}
344
345static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
346 struct machine *machine)
347{
348 struct auxtrace_cache *c;
349 unsigned int bits;
350
351 if (dso->auxtrace_cache)
352 return dso->auxtrace_cache;
353
354 bits = intel_pt_cache_size(dso, machine);
355
356 /* Ignoring cache creation failure */
357 c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
358
359 dso->auxtrace_cache = c;
360
361 return c;
362}
363
364static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
365 u64 offset, u64 insn_cnt, u64 byte_cnt,
366 struct intel_pt_insn *intel_pt_insn)
367{
368 struct auxtrace_cache *c = intel_pt_cache(dso, machine);
369 struct intel_pt_cache_entry *e;
370 int err;
371
372 if (!c)
373 return -ENOMEM;
374
375 e = auxtrace_cache__alloc_entry(c);
376 if (!e)
377 return -ENOMEM;
378
379 e->insn_cnt = insn_cnt;
380 e->byte_cnt = byte_cnt;
381 e->op = intel_pt_insn->op;
382 e->branch = intel_pt_insn->branch;
383 e->length = intel_pt_insn->length;
384 e->rel = intel_pt_insn->rel;
385
386 err = auxtrace_cache__add(c, offset, &e->entry);
387 if (err)
388 auxtrace_cache__free_entry(c, e);
389
390 return err;
391}
392
393static struct intel_pt_cache_entry *
394intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
395{
396 struct auxtrace_cache *c = intel_pt_cache(dso, machine);
397
398 if (!c)
399 return NULL;
400
401 return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
402}
403
404static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
405 uint64_t *insn_cnt_ptr, uint64_t *ip,
406 uint64_t to_ip, uint64_t max_insn_cnt,
407 void *data)
408{
409 struct intel_pt_queue *ptq = data;
410 struct machine *machine = ptq->pt->machine;
411 struct thread *thread;
412 struct addr_location al;
413 unsigned char buf[1024];
414 size_t bufsz;
415 ssize_t len;
416 int x86_64;
417 u8 cpumode;
418 u64 offset, start_offset, start_ip;
419 u64 insn_cnt = 0;
420 bool one_map = true;
421
422 if (to_ip && *ip == to_ip)
423 goto out_no_cache;
424
425 bufsz = intel_pt_insn_max_size();
426
427 if (*ip >= ptq->pt->kernel_start)
428 cpumode = PERF_RECORD_MISC_KERNEL;
429 else
430 cpumode = PERF_RECORD_MISC_USER;
431
432 thread = ptq->thread;
433 if (!thread) {
434 if (cpumode != PERF_RECORD_MISC_KERNEL)
435 return -EINVAL;
436 thread = ptq->pt->unknown_thread;
437 }
438
439 while (1) {
440 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
441 if (!al.map || !al.map->dso)
442 return -EINVAL;
443
444 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
445 dso__data_status_seen(al.map->dso,
446 DSO_DATA_STATUS_SEEN_ITRACE))
447 return -ENOENT;
448
449 offset = al.map->map_ip(al.map, *ip);
450
451 if (!to_ip && one_map) {
452 struct intel_pt_cache_entry *e;
453
454 e = intel_pt_cache_lookup(al.map->dso, machine, offset);
455 if (e &&
456 (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
457 *insn_cnt_ptr = e->insn_cnt;
458 *ip += e->byte_cnt;
459 intel_pt_insn->op = e->op;
460 intel_pt_insn->branch = e->branch;
461 intel_pt_insn->length = e->length;
462 intel_pt_insn->rel = e->rel;
463 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
464 return 0;
465 }
466 }
467
468 start_offset = offset;
469 start_ip = *ip;
470
471 /* Load maps to ensure dso->is_64_bit has been updated */
472 map__load(al.map, machine->symbol_filter);
473
474 x86_64 = al.map->dso->is_64_bit;
475
476 while (1) {
477 len = dso__data_read_offset(al.map->dso, machine,
478 offset, buf, bufsz);
479 if (len <= 0)
480 return -EINVAL;
481
482 if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
483 return -EINVAL;
484
485 intel_pt_log_insn(intel_pt_insn, *ip);
486
487 insn_cnt += 1;
488
489 if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
490 goto out;
491
492 if (max_insn_cnt && insn_cnt >= max_insn_cnt)
493 goto out_no_cache;
494
495 *ip += intel_pt_insn->length;
496
497 if (to_ip && *ip == to_ip)
498 goto out_no_cache;
499
500 if (*ip >= al.map->end)
501 break;
502
503 offset += intel_pt_insn->length;
504 }
505 one_map = false;
506 }
507out:
508 *insn_cnt_ptr = insn_cnt;
509
510 if (!one_map)
511 goto out_no_cache;
512
513 /*
514 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
515 * entries.
516 */
517 if (to_ip) {
518 struct intel_pt_cache_entry *e;
519
520 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
521 if (e)
522 return 0;
523 }
524
525 /* Ignore cache errors */
526 intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
527 *ip - start_ip, intel_pt_insn);
528
529 return 0;
530
531out_no_cache:
532 *insn_cnt_ptr = insn_cnt;
533 return 0;
534}
535
536static bool intel_pt_get_config(struct intel_pt *pt,
537 struct perf_event_attr *attr, u64 *config)
538{
539 if (attr->type == pt->pmu_type) {
540 if (config)
541 *config = attr->config;
542 return true;
543 }
544
545 return false;
546}
547
548static bool intel_pt_exclude_kernel(struct intel_pt *pt)
549{
550 struct perf_evsel *evsel;
551
552 evlist__for_each(pt->session->evlist, evsel) {
553 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
554 !evsel->attr.exclude_kernel)
555 return false;
556 }
557 return true;
558}
559
560static bool intel_pt_return_compression(struct intel_pt *pt)
561{
562 struct perf_evsel *evsel;
563 u64 config;
564
565 if (!pt->noretcomp_bit)
566 return true;
567
568 evlist__for_each(pt->session->evlist, evsel) {
569 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
570 (config & pt->noretcomp_bit))
571 return false;
572 }
573 return true;
574}
575
576static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
577{
578 struct perf_evsel *evsel;
579 unsigned int shift;
580 u64 config;
581
582 if (!pt->mtc_freq_bits)
583 return 0;
584
585 for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
586 config >>= 1;
587
588 evlist__for_each(pt->session->evlist, evsel) {
589 if (intel_pt_get_config(pt, &evsel->attr, &config))
590 return (config & pt->mtc_freq_bits) >> shift;
591 }
592 return 0;
593}
594
595static bool intel_pt_timeless_decoding(struct intel_pt *pt)
596{
597 struct perf_evsel *evsel;
598 bool timeless_decoding = true;
599 u64 config;
600
601 if (!pt->tsc_bit || !pt->cap_user_time_zero)
602 return true;
603
604 evlist__for_each(pt->session->evlist, evsel) {
605 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
606 return true;
607 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
608 if (config & pt->tsc_bit)
609 timeless_decoding = false;
610 else
611 return true;
612 }
613 }
614 return timeless_decoding;
615}
616
617static bool intel_pt_tracing_kernel(struct intel_pt *pt)
618{
619 struct perf_evsel *evsel;
620
621 evlist__for_each(pt->session->evlist, evsel) {
622 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
623 !evsel->attr.exclude_kernel)
624 return true;
625 }
626 return false;
627}
628
629static bool intel_pt_have_tsc(struct intel_pt *pt)
630{
631 struct perf_evsel *evsel;
632 bool have_tsc = false;
633 u64 config;
634
635 if (!pt->tsc_bit)
636 return false;
637
638 evlist__for_each(pt->session->evlist, evsel) {
639 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
640 if (config & pt->tsc_bit)
641 have_tsc = true;
642 else
643 return false;
644 }
645 }
646 return have_tsc;
647}
648
649static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
650{
651 u64 quot, rem;
652
653 quot = ns / pt->tc.time_mult;
654 rem = ns % pt->tc.time_mult;
655 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
656 pt->tc.time_mult;
657}
658
659static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
660 unsigned int queue_nr)
661{
662 struct intel_pt_params params = { .get_trace = 0, };
663 struct intel_pt_queue *ptq;
664
665 ptq = zalloc(sizeof(struct intel_pt_queue));
666 if (!ptq)
667 return NULL;
668
669 if (pt->synth_opts.callchain) {
670 size_t sz = sizeof(struct ip_callchain);
671
672 sz += pt->synth_opts.callchain_sz * sizeof(u64);
673 ptq->chain = zalloc(sz);
674 if (!ptq->chain)
675 goto out_free;
676 }
677
678 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
679 if (!ptq->event_buf)
680 goto out_free;
681
682 ptq->pt = pt;
683 ptq->queue_nr = queue_nr;
684 ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
685 ptq->pid = -1;
686 ptq->tid = -1;
687 ptq->cpu = -1;
688 ptq->next_tid = -1;
689
690 params.get_trace = intel_pt_get_trace;
691 params.walk_insn = intel_pt_walk_next_insn;
692 params.data = ptq;
693 params.return_compression = intel_pt_return_compression(pt);
694 params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
695 params.mtc_period = intel_pt_mtc_period(pt);
696 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
697 params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
698
699 if (pt->synth_opts.instructions) {
700 if (pt->synth_opts.period) {
701 switch (pt->synth_opts.period_type) {
702 case PERF_ITRACE_PERIOD_INSTRUCTIONS:
703 params.period_type =
704 INTEL_PT_PERIOD_INSTRUCTIONS;
705 params.period = pt->synth_opts.period;
706 break;
707 case PERF_ITRACE_PERIOD_TICKS:
708 params.period_type = INTEL_PT_PERIOD_TICKS;
709 params.period = pt->synth_opts.period;
710 break;
711 case PERF_ITRACE_PERIOD_NANOSECS:
712 params.period_type = INTEL_PT_PERIOD_TICKS;
713 params.period = intel_pt_ns_to_ticks(pt,
714 pt->synth_opts.period);
715 break;
716 default:
717 break;
718 }
719 }
720
721 if (!params.period) {
722 params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
723 params.period = 1000;
724 }
725 }
726
727 ptq->decoder = intel_pt_decoder_new(&params);
728 if (!ptq->decoder)
729 goto out_free;
730
731 return ptq;
732
733out_free:
734 zfree(&ptq->event_buf);
735 zfree(&ptq->chain);
736 free(ptq);
737 return NULL;
738}
739
740static void intel_pt_free_queue(void *priv)
741{
742 struct intel_pt_queue *ptq = priv;
743
744 if (!ptq)
745 return;
746 thread__zput(ptq->thread);
747 intel_pt_decoder_free(ptq->decoder);
748 zfree(&ptq->event_buf);
749 zfree(&ptq->chain);
750 free(ptq);
751}
752
753static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
754 struct auxtrace_queue *queue)
755{
756 struct intel_pt_queue *ptq = queue->priv;
757
758 if (queue->tid == -1 || pt->have_sched_switch) {
759 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
760 thread__zput(ptq->thread);
761 }
762
763 if (!ptq->thread && ptq->tid != -1)
764 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
765
766 if (ptq->thread) {
767 ptq->pid = ptq->thread->pid_;
768 if (queue->cpu == -1)
769 ptq->cpu = ptq->thread->cpu;
770 }
771}
772
773static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
774{
775 if (ptq->state->flags & INTEL_PT_ABORT_TX) {
776 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
777 } else if (ptq->state->flags & INTEL_PT_ASYNC) {
778 if (ptq->state->to_ip)
779 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
780 PERF_IP_FLAG_ASYNC |
781 PERF_IP_FLAG_INTERRUPT;
782 else
783 ptq->flags = PERF_IP_FLAG_BRANCH |
784 PERF_IP_FLAG_TRACE_END;
785 ptq->insn_len = 0;
786 } else {
787 if (ptq->state->from_ip)
788 ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
789 else
790 ptq->flags = PERF_IP_FLAG_BRANCH |
791 PERF_IP_FLAG_TRACE_BEGIN;
792 if (ptq->state->flags & INTEL_PT_IN_TX)
793 ptq->flags |= PERF_IP_FLAG_IN_TX;
794 ptq->insn_len = ptq->state->insn_len;
795 }
796}
797
798static int intel_pt_setup_queue(struct intel_pt *pt,
799 struct auxtrace_queue *queue,
800 unsigned int queue_nr)
801{
802 struct intel_pt_queue *ptq = queue->priv;
803
804 if (list_empty(&queue->head))
805 return 0;
806
807 if (!ptq) {
808 ptq = intel_pt_alloc_queue(pt, queue_nr);
809 if (!ptq)
810 return -ENOMEM;
811 queue->priv = ptq;
812
813 if (queue->cpu != -1)
814 ptq->cpu = queue->cpu;
815 ptq->tid = queue->tid;
816
817 if (pt->sampling_mode) {
818 if (pt->timeless_decoding)
819 ptq->step_through_buffers = true;
820 if (pt->timeless_decoding || !pt->have_sched_switch)
821 ptq->use_buffer_pid_tid = true;
822 }
823 }
824
825 if (!ptq->on_heap &&
826 (!pt->sync_switch ||
827 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
828 const struct intel_pt_state *state;
829 int ret;
830
831 if (pt->timeless_decoding)
832 return 0;
833
834 intel_pt_log("queue %u getting timestamp\n", queue_nr);
835 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
836 queue_nr, ptq->cpu, ptq->pid, ptq->tid);
837 while (1) {
838 state = intel_pt_decode(ptq->decoder);
839 if (state->err) {
840 if (state->err == INTEL_PT_ERR_NODATA) {
841 intel_pt_log("queue %u has no timestamp\n",
842 queue_nr);
843 return 0;
844 }
845 continue;
846 }
847 if (state->timestamp)
848 break;
849 }
850
851 ptq->timestamp = state->timestamp;
852 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
853 queue_nr, ptq->timestamp);
854 ptq->state = state;
855 ptq->have_sample = true;
856 intel_pt_sample_flags(ptq);
857 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
858 if (ret)
859 return ret;
860 ptq->on_heap = true;
861 }
862
863 return 0;
864}
865
866static int intel_pt_setup_queues(struct intel_pt *pt)
867{
868 unsigned int i;
869 int ret;
870
871 for (i = 0; i < pt->queues.nr_queues; i++) {
872 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
873 if (ret)
874 return ret;
875 }
876 return 0;
877}
878
879static int intel_pt_inject_event(union perf_event *event,
880 struct perf_sample *sample, u64 type,
881 bool swapped)
882{
883 event->header.size = perf_event__sample_event_size(sample, type, 0);
884 return perf_event__synthesize_sample(event, type, 0, sample, swapped);
885}
886
887static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
888{
889 int ret;
890 struct intel_pt *pt = ptq->pt;
891 union perf_event *event = ptq->event_buf;
892 struct perf_sample sample = { .ip = 0, };
893
894 event->sample.header.type = PERF_RECORD_SAMPLE;
895 event->sample.header.misc = PERF_RECORD_MISC_USER;
896 event->sample.header.size = sizeof(struct perf_event_header);
897
898 if (!pt->timeless_decoding)
899 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
900
901 sample.ip = ptq->state->from_ip;
902 sample.pid = ptq->pid;
903 sample.tid = ptq->tid;
904 sample.addr = ptq->state->to_ip;
905 sample.id = ptq->pt->branches_id;
906 sample.stream_id = ptq->pt->branches_id;
907 sample.period = 1;
908 sample.cpu = ptq->cpu;
909 sample.flags = ptq->flags;
910 sample.insn_len = ptq->insn_len;
911
912 if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
913 return 0;
914
915 if (pt->synth_opts.inject) {
916 ret = intel_pt_inject_event(event, &sample,
917 pt->branches_sample_type,
918 pt->synth_needs_swap);
919 if (ret)
920 return ret;
921 }
922
923 ret = perf_session__deliver_synth_event(pt->session, event, &sample);
924 if (ret)
925 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
926 ret);
927
928 return ret;
929}
930
931static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
932{
933 int ret;
934 struct intel_pt *pt = ptq->pt;
935 union perf_event *event = ptq->event_buf;
936 struct perf_sample sample = { .ip = 0, };
937
938 event->sample.header.type = PERF_RECORD_SAMPLE;
939 event->sample.header.misc = PERF_RECORD_MISC_USER;
940 event->sample.header.size = sizeof(struct perf_event_header);
941
942 if (!pt->timeless_decoding)
943 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
944
945 sample.ip = ptq->state->from_ip;
946 sample.pid = ptq->pid;
947 sample.tid = ptq->tid;
948 sample.addr = ptq->state->to_ip;
949 sample.id = ptq->pt->instructions_id;
950 sample.stream_id = ptq->pt->instructions_id;
951 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
952 sample.cpu = ptq->cpu;
953 sample.flags = ptq->flags;
954 sample.insn_len = ptq->insn_len;
955
956 ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
957
958 if (pt->synth_opts.callchain) {
959 thread_stack__sample(ptq->thread, ptq->chain,
960 pt->synth_opts.callchain_sz, sample.ip);
961 sample.callchain = ptq->chain;
962 }
963
964 if (pt->synth_opts.inject) {
965 ret = intel_pt_inject_event(event, &sample,
966 pt->instructions_sample_type,
967 pt->synth_needs_swap);
968 if (ret)
969 return ret;
970 }
971
972 ret = perf_session__deliver_synth_event(pt->session, event, &sample);
973 if (ret)
974 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
975 ret);
976
977 return ret;
978}
979
980static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
981{
982 int ret;
983 struct intel_pt *pt = ptq->pt;
984 union perf_event *event = ptq->event_buf;
985 struct perf_sample sample = { .ip = 0, };
986
987 event->sample.header.type = PERF_RECORD_SAMPLE;
988 event->sample.header.misc = PERF_RECORD_MISC_USER;
989 event->sample.header.size = sizeof(struct perf_event_header);
990
991 if (!pt->timeless_decoding)
992 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
993
994 sample.ip = ptq->state->from_ip;
995 sample.pid = ptq->pid;
996 sample.tid = ptq->tid;
997 sample.addr = ptq->state->to_ip;
998 sample.id = ptq->pt->transactions_id;
999 sample.stream_id = ptq->pt->transactions_id;
1000 sample.period = 1;
1001 sample.cpu = ptq->cpu;
1002 sample.flags = ptq->flags;
1003 sample.insn_len = ptq->insn_len;
1004
1005 if (pt->synth_opts.callchain) {
1006 thread_stack__sample(ptq->thread, ptq->chain,
1007 pt->synth_opts.callchain_sz, sample.ip);
1008 sample.callchain = ptq->chain;
1009 }
1010
1011 if (pt->synth_opts.inject) {
1012 ret = intel_pt_inject_event(event, &sample,
1013 pt->transactions_sample_type,
1014 pt->synth_needs_swap);
1015 if (ret)
1016 return ret;
1017 }
1018
1019 ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1020 if (ret)
1021 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1022 ret);
1023
1024 return ret;
1025}
1026
1027static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1028 pid_t pid, pid_t tid, u64 ip)
1029{
1030 union perf_event event;
1031 char msg[MAX_AUXTRACE_ERROR_MSG];
1032 int err;
1033
1034 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1035
1036 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1037 code, cpu, pid, tid, ip, msg);
1038
1039 err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1040 if (err)
1041 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1042 err);
1043
1044 return err;
1045}
1046
1047static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1048{
1049 struct auxtrace_queue *queue;
1050 pid_t tid = ptq->next_tid;
1051 int err;
1052
1053 if (tid == -1)
1054 return 0;
1055
1056 intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1057
1058 err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1059
1060 queue = &pt->queues.queue_array[ptq->queue_nr];
1061 intel_pt_set_pid_tid_cpu(pt, queue);
1062
1063 ptq->next_tid = -1;
1064
1065 return err;
1066}
1067
1068static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1069{
1070 struct intel_pt *pt = ptq->pt;
1071
1072 return ip == pt->switch_ip &&
1073 (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1074 !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1075 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1076}
1077
1078static int intel_pt_sample(struct intel_pt_queue *ptq)
1079{
1080 const struct intel_pt_state *state = ptq->state;
1081 struct intel_pt *pt = ptq->pt;
1082 int err;
1083
1084 if (!ptq->have_sample)
1085 return 0;
1086
1087 ptq->have_sample = false;
1088
1089 if (pt->sample_instructions &&
1090 (state->type & INTEL_PT_INSTRUCTION)) {
1091 err = intel_pt_synth_instruction_sample(ptq);
1092 if (err)
1093 return err;
1094 }
1095
1096 if (pt->sample_transactions &&
1097 (state->type & INTEL_PT_TRANSACTION)) {
1098 err = intel_pt_synth_transaction_sample(ptq);
1099 if (err)
1100 return err;
1101 }
1102
1103 if (!(state->type & INTEL_PT_BRANCH))
1104 return 0;
1105
1106 if (pt->synth_opts.callchain)
1107 thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1108 state->to_ip, ptq->insn_len,
1109 state->trace_nr);
1110 else
1111 thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1112
1113 if (pt->sample_branches) {
1114 err = intel_pt_synth_branch_sample(ptq);
1115 if (err)
1116 return err;
1117 }
1118
1119 if (!pt->sync_switch)
1120 return 0;
1121
1122 if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1123 switch (ptq->switch_state) {
1124 case INTEL_PT_SS_UNKNOWN:
1125 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1126 err = intel_pt_next_tid(pt, ptq);
1127 if (err)
1128 return err;
1129 ptq->switch_state = INTEL_PT_SS_TRACING;
1130 break;
1131 default:
1132 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1133 return 1;
1134 }
1135 } else if (!state->to_ip) {
1136 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1137 } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1138 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1139 } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1140 state->to_ip == pt->ptss_ip &&
1141 (ptq->flags & PERF_IP_FLAG_CALL)) {
1142 ptq->switch_state = INTEL_PT_SS_TRACING;
1143 }
1144
1145 return 0;
1146}
1147
1148static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
1149{
1150 struct map *map;
1151 struct symbol *sym, *start;
1152 u64 ip, switch_ip = 0;
1153
1154 if (ptss_ip)
1155 *ptss_ip = 0;
1156
1157 map = machine__kernel_map(machine, MAP__FUNCTION);
1158 if (!map)
1159 return 0;
1160
1161 if (map__load(map, machine->symbol_filter))
1162 return 0;
1163
1164 start = dso__first_symbol(map->dso, MAP__FUNCTION);
1165
1166 for (sym = start; sym; sym = dso__next_symbol(sym)) {
1167 if (sym->binding == STB_GLOBAL &&
1168 !strcmp(sym->name, "__switch_to")) {
1169 ip = map->unmap_ip(map, sym->start);
1170 if (ip >= map->start && ip < map->end) {
1171 switch_ip = ip;
1172 break;
1173 }
1174 }
1175 }
1176
1177 if (!switch_ip || !ptss_ip)
1178 return 0;
1179
1180 for (sym = start; sym; sym = dso__next_symbol(sym)) {
1181 if (!strcmp(sym->name, "perf_trace_sched_switch")) {
1182 ip = map->unmap_ip(map, sym->start);
1183 if (ip >= map->start && ip < map->end) {
1184 *ptss_ip = ip;
1185 break;
1186 }
1187 }
1188 }
1189
1190 return switch_ip;
1191}
1192
1193static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1194{
1195 const struct intel_pt_state *state = ptq->state;
1196 struct intel_pt *pt = ptq->pt;
1197 int err;
1198
1199 if (!pt->kernel_start) {
1200 pt->kernel_start = machine__kernel_start(pt->machine);
1201 if (pt->per_cpu_mmaps && pt->have_sched_switch &&
1202 !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1203 !pt->sampling_mode) {
1204 pt->switch_ip = intel_pt_switch_ip(pt->machine,
1205 &pt->ptss_ip);
1206 if (pt->switch_ip) {
1207 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1208 pt->switch_ip, pt->ptss_ip);
1209 pt->sync_switch = true;
1210 }
1211 }
1212 }
1213
1214 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1215 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1216 while (1) {
1217 err = intel_pt_sample(ptq);
1218 if (err)
1219 return err;
1220
1221 state = intel_pt_decode(ptq->decoder);
1222 if (state->err) {
1223 if (state->err == INTEL_PT_ERR_NODATA)
1224 return 1;
1225 if (pt->sync_switch &&
1226 state->from_ip >= pt->kernel_start) {
1227 pt->sync_switch = false;
1228 intel_pt_next_tid(pt, ptq);
1229 }
1230 if (pt->synth_opts.errors) {
1231 err = intel_pt_synth_error(pt, state->err,
1232 ptq->cpu, ptq->pid,
1233 ptq->tid,
1234 state->from_ip);
1235 if (err)
1236 return err;
1237 }
1238 continue;
1239 }
1240
1241 ptq->state = state;
1242 ptq->have_sample = true;
1243 intel_pt_sample_flags(ptq);
1244
1245 /* Use estimated TSC upon return to user space */
1246 if (pt->est_tsc &&
1247 (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1248 state->to_ip && state->to_ip < pt->kernel_start) {
1249 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1250 state->timestamp, state->est_timestamp);
1251 ptq->timestamp = state->est_timestamp;
1252 /* Use estimated TSC in unknown switch state */
1253 } else if (pt->sync_switch &&
1254 ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1255 intel_pt_is_switch_ip(ptq, state->to_ip) &&
1256 ptq->next_tid == -1) {
1257 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1258 state->timestamp, state->est_timestamp);
1259 ptq->timestamp = state->est_timestamp;
1260 } else if (state->timestamp > ptq->timestamp) {
1261 ptq->timestamp = state->timestamp;
1262 }
1263
1264 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1265 *timestamp = ptq->timestamp;
1266 return 0;
1267 }
1268 }
1269 return 0;
1270}
1271
1272static inline int intel_pt_update_queues(struct intel_pt *pt)
1273{
1274 if (pt->queues.new_data) {
1275 pt->queues.new_data = false;
1276 return intel_pt_setup_queues(pt);
1277 }
1278 return 0;
1279}
1280
1281static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1282{
1283 unsigned int queue_nr;
1284 u64 ts;
1285 int ret;
1286
1287 while (1) {
1288 struct auxtrace_queue *queue;
1289 struct intel_pt_queue *ptq;
1290
1291 if (!pt->heap.heap_cnt)
1292 return 0;
1293
1294 if (pt->heap.heap_array[0].ordinal >= timestamp)
1295 return 0;
1296
1297 queue_nr = pt->heap.heap_array[0].queue_nr;
1298 queue = &pt->queues.queue_array[queue_nr];
1299 ptq = queue->priv;
1300
1301 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1302 queue_nr, pt->heap.heap_array[0].ordinal,
1303 timestamp);
1304
1305 auxtrace_heap__pop(&pt->heap);
1306
1307 if (pt->heap.heap_cnt) {
1308 ts = pt->heap.heap_array[0].ordinal + 1;
1309 if (ts > timestamp)
1310 ts = timestamp;
1311 } else {
1312 ts = timestamp;
1313 }
1314
1315 intel_pt_set_pid_tid_cpu(pt, queue);
1316
1317 ret = intel_pt_run_decoder(ptq, &ts);
1318
1319 if (ret < 0) {
1320 auxtrace_heap__add(&pt->heap, queue_nr, ts);
1321 return ret;
1322 }
1323
1324 if (!ret) {
1325 ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1326 if (ret < 0)
1327 return ret;
1328 } else {
1329 ptq->on_heap = false;
1330 }
1331 }
1332
1333 return 0;
1334}
1335
1336static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1337 u64 time_)
1338{
1339 struct auxtrace_queues *queues = &pt->queues;
1340 unsigned int i;
1341 u64 ts = 0;
1342
1343 for (i = 0; i < queues->nr_queues; i++) {
1344 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1345 struct intel_pt_queue *ptq = queue->priv;
1346
1347 if (ptq && (tid == -1 || ptq->tid == tid)) {
1348 ptq->time = time_;
1349 intel_pt_set_pid_tid_cpu(pt, queue);
1350 intel_pt_run_decoder(ptq, &ts);
1351 }
1352 }
1353 return 0;
1354}
1355
1356static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1357{
1358 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1359 sample->pid, sample->tid, 0);
1360}
1361
1362static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1363{
1364 unsigned i, j;
1365
1366 if (cpu < 0 || !pt->queues.nr_queues)
1367 return NULL;
1368
1369 if ((unsigned)cpu >= pt->queues.nr_queues)
1370 i = pt->queues.nr_queues - 1;
1371 else
1372 i = cpu;
1373
1374 if (pt->queues.queue_array[i].cpu == cpu)
1375 return pt->queues.queue_array[i].priv;
1376
1377 for (j = 0; i > 0; j++) {
1378 if (pt->queues.queue_array[--i].cpu == cpu)
1379 return pt->queues.queue_array[i].priv;
1380 }
1381
1382 for (; j < pt->queues.nr_queues; j++) {
1383 if (pt->queues.queue_array[j].cpu == cpu)
1384 return pt->queues.queue_array[j].priv;
1385 }
1386
1387 return NULL;
1388}
1389
1390static int intel_pt_process_switch(struct intel_pt *pt,
1391 struct perf_sample *sample)
1392{
1393 struct intel_pt_queue *ptq;
1394 struct perf_evsel *evsel;
1395 pid_t tid;
1396 int cpu, err;
1397
1398 evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1399 if (evsel != pt->switch_evsel)
1400 return 0;
1401
1402 tid = perf_evsel__intval(evsel, sample, "next_pid");
1403 cpu = sample->cpu;
1404
1405 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1406 cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1407 &pt->tc));
1408
1409 if (!pt->sync_switch)
1410 goto out;
1411
1412 ptq = intel_pt_cpu_to_ptq(pt, cpu);
1413 if (!ptq)
1414 goto out;
1415
1416 switch (ptq->switch_state) {
1417 case INTEL_PT_SS_NOT_TRACING:
1418 ptq->next_tid = -1;
1419 break;
1420 case INTEL_PT_SS_UNKNOWN:
1421 case INTEL_PT_SS_TRACING:
1422 ptq->next_tid = tid;
1423 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1424 return 0;
1425 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1426 if (!ptq->on_heap) {
1427 ptq->timestamp = perf_time_to_tsc(sample->time,
1428 &pt->tc);
1429 err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1430 ptq->timestamp);
1431 if (err)
1432 return err;
1433 ptq->on_heap = true;
1434 }
1435 ptq->switch_state = INTEL_PT_SS_TRACING;
1436 break;
1437 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1438 ptq->next_tid = tid;
1439 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1440 break;
1441 default:
1442 break;
1443 }
1444out:
1445 return machine__set_current_tid(pt->machine, cpu, -1, tid);
1446}
1447
1448static int intel_pt_process_itrace_start(struct intel_pt *pt,
1449 union perf_event *event,
1450 struct perf_sample *sample)
1451{
1452 if (!pt->per_cpu_mmaps)
1453 return 0;
1454
1455 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1456 sample->cpu, event->itrace_start.pid,
1457 event->itrace_start.tid, sample->time,
1458 perf_time_to_tsc(sample->time, &pt->tc));
1459
1460 return machine__set_current_tid(pt->machine, sample->cpu,
1461 event->itrace_start.pid,
1462 event->itrace_start.tid);
1463}
1464
1465static int intel_pt_process_event(struct perf_session *session,
1466 union perf_event *event,
1467 struct perf_sample *sample,
1468 struct perf_tool *tool)
1469{
1470 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1471 auxtrace);
1472 u64 timestamp;
1473 int err = 0;
1474
1475 if (dump_trace)
1476 return 0;
1477
1478 if (!tool->ordered_events) {
1479 pr_err("Intel Processor Trace requires ordered events\n");
1480 return -EINVAL;
1481 }
1482
1483 if (sample->time && sample->time != (u64)-1)
1484 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1485 else
1486 timestamp = 0;
1487
1488 if (timestamp || pt->timeless_decoding) {
1489 err = intel_pt_update_queues(pt);
1490 if (err)
1491 return err;
1492 }
1493
1494 if (pt->timeless_decoding) {
1495 if (event->header.type == PERF_RECORD_EXIT) {
1496 err = intel_pt_process_timeless_queues(pt,
1497 event->comm.tid,
1498 sample->time);
1499 }
1500 } else if (timestamp) {
1501 err = intel_pt_process_queues(pt, timestamp);
1502 }
1503 if (err)
1504 return err;
1505
1506 if (event->header.type == PERF_RECORD_AUX &&
1507 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1508 pt->synth_opts.errors) {
1509 err = intel_pt_lost(pt, sample);
1510 if (err)
1511 return err;
1512 }
1513
1514 if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1515 err = intel_pt_process_switch(pt, sample);
1516 else if (event->header.type == PERF_RECORD_ITRACE_START)
1517 err = intel_pt_process_itrace_start(pt, event, sample);
1518
1519 intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1520 perf_event__name(event->header.type), event->header.type,
1521 sample->cpu, sample->time, timestamp);
1522
1523 return err;
1524}
1525
1526static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1527{
1528 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1529 auxtrace);
1530 int ret;
1531
1532 if (dump_trace)
1533 return 0;
1534
1535 if (!tool->ordered_events)
1536 return -EINVAL;
1537
1538 ret = intel_pt_update_queues(pt);
1539 if (ret < 0)
1540 return ret;
1541
1542 if (pt->timeless_decoding)
1543 return intel_pt_process_timeless_queues(pt, -1,
1544 MAX_TIMESTAMP - 1);
1545
1546 return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1547}
1548
1549static void intel_pt_free_events(struct perf_session *session)
1550{
1551 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1552 auxtrace);
1553 struct auxtrace_queues *queues = &pt->queues;
1554 unsigned int i;
1555
1556 for (i = 0; i < queues->nr_queues; i++) {
1557 intel_pt_free_queue(queues->queue_array[i].priv);
1558 queues->queue_array[i].priv = NULL;
1559 }
1560 intel_pt_log_disable();
1561 auxtrace_queues__free(queues);
1562}
1563
1564static void intel_pt_free(struct perf_session *session)
1565{
1566 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1567 auxtrace);
1568
1569 auxtrace_heap__free(&pt->heap);
1570 intel_pt_free_events(session);
1571 session->auxtrace = NULL;
1572 thread__delete(pt->unknown_thread);
1573 free(pt);
1574}
1575
1576static int intel_pt_process_auxtrace_event(struct perf_session *session,
1577 union perf_event *event,
1578 struct perf_tool *tool __maybe_unused)
1579{
1580 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1581 auxtrace);
1582
1583 if (pt->sampling_mode)
1584 return 0;
1585
1586 if (!pt->data_queued) {
1587 struct auxtrace_buffer *buffer;
1588 off_t data_offset;
1589 int fd = perf_data_file__fd(session->file);
1590 int err;
1591
1592 if (perf_data_file__is_pipe(session->file)) {
1593 data_offset = 0;
1594 } else {
1595 data_offset = lseek(fd, 0, SEEK_CUR);
1596 if (data_offset == -1)
1597 return -errno;
1598 }
1599
1600 err = auxtrace_queues__add_event(&pt->queues, session, event,
1601 data_offset, &buffer);
1602 if (err)
1603 return err;
1604
1605 /* Dump here now we have copied a piped trace out of the pipe */
1606 if (dump_trace) {
1607 if (auxtrace_buffer__get_data(buffer, fd)) {
1608 intel_pt_dump_event(pt, buffer->data,
1609 buffer->size);
1610 auxtrace_buffer__put_data(buffer);
1611 }
1612 }
1613 }
1614
1615 return 0;
1616}
1617
1618struct intel_pt_synth {
1619 struct perf_tool dummy_tool;
1620 struct perf_session *session;
1621};
1622
1623static int intel_pt_event_synth(struct perf_tool *tool,
1624 union perf_event *event,
1625 struct perf_sample *sample __maybe_unused,
1626 struct machine *machine __maybe_unused)
1627{
1628 struct intel_pt_synth *intel_pt_synth =
1629 container_of(tool, struct intel_pt_synth, dummy_tool);
1630
1631 return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1632 NULL);
1633}
1634
1635static int intel_pt_synth_event(struct perf_session *session,
1636 struct perf_event_attr *attr, u64 id)
1637{
1638 struct intel_pt_synth intel_pt_synth;
1639
1640 memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1641 intel_pt_synth.session = session;
1642
1643 return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1644 &id, intel_pt_event_synth);
1645}
1646
1647static int intel_pt_synth_events(struct intel_pt *pt,
1648 struct perf_session *session)
1649{
1650 struct perf_evlist *evlist = session->evlist;
1651 struct perf_evsel *evsel;
1652 struct perf_event_attr attr;
1653 bool found = false;
1654 u64 id;
1655 int err;
1656
1657 evlist__for_each(evlist, evsel) {
1658 if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1659 found = true;
1660 break;
1661 }
1662 }
1663
1664 if (!found) {
1665 pr_debug("There are no selected events with Intel Processor Trace data\n");
1666 return 0;
1667 }
1668
1669 memset(&attr, 0, sizeof(struct perf_event_attr));
1670 attr.size = sizeof(struct perf_event_attr);
1671 attr.type = PERF_TYPE_HARDWARE;
1672 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1673 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1674 PERF_SAMPLE_PERIOD;
1675 if (pt->timeless_decoding)
1676 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1677 else
1678 attr.sample_type |= PERF_SAMPLE_TIME;
1679 if (!pt->per_cpu_mmaps)
1680 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1681 attr.exclude_user = evsel->attr.exclude_user;
1682 attr.exclude_kernel = evsel->attr.exclude_kernel;
1683 attr.exclude_hv = evsel->attr.exclude_hv;
1684 attr.exclude_host = evsel->attr.exclude_host;
1685 attr.exclude_guest = evsel->attr.exclude_guest;
1686 attr.sample_id_all = evsel->attr.sample_id_all;
1687 attr.read_format = evsel->attr.read_format;
1688
1689 id = evsel->id[0] + 1000000000;
1690 if (!id)
1691 id = 1;
1692
1693 if (pt->synth_opts.instructions) {
1694 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1695 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1696 attr.sample_period =
1697 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1698 else
1699 attr.sample_period = pt->synth_opts.period;
1700 pt->instructions_sample_period = attr.sample_period;
1701 if (pt->synth_opts.callchain)
1702 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1703 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1704 id, (u64)attr.sample_type);
1705 err = intel_pt_synth_event(session, &attr, id);
1706 if (err) {
1707 pr_err("%s: failed to synthesize 'instructions' event type\n",
1708 __func__);
1709 return err;
1710 }
1711 pt->sample_instructions = true;
1712 pt->instructions_sample_type = attr.sample_type;
1713 pt->instructions_id = id;
1714 id += 1;
1715 }
1716
1717 if (pt->synth_opts.transactions) {
1718 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1719 attr.sample_period = 1;
1720 if (pt->synth_opts.callchain)
1721 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1722 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1723 id, (u64)attr.sample_type);
1724 err = intel_pt_synth_event(session, &attr, id);
1725 if (err) {
1726 pr_err("%s: failed to synthesize 'transactions' event type\n",
1727 __func__);
1728 return err;
1729 }
1730 pt->sample_transactions = true;
1731 pt->transactions_id = id;
1732 id += 1;
1733 evlist__for_each(evlist, evsel) {
1734 if (evsel->id && evsel->id[0] == pt->transactions_id) {
1735 if (evsel->name)
1736 zfree(&evsel->name);
1737 evsel->name = strdup("transactions");
1738 break;
1739 }
1740 }
1741 }
1742
1743 if (pt->synth_opts.branches) {
1744 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1745 attr.sample_period = 1;
1746 attr.sample_type |= PERF_SAMPLE_ADDR;
1747 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1748 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1749 id, (u64)attr.sample_type);
1750 err = intel_pt_synth_event(session, &attr, id);
1751 if (err) {
1752 pr_err("%s: failed to synthesize 'branches' event type\n",
1753 __func__);
1754 return err;
1755 }
1756 pt->sample_branches = true;
1757 pt->branches_sample_type = attr.sample_type;
1758 pt->branches_id = id;
1759 }
1760
1761 pt->synth_needs_swap = evsel->needs_swap;
1762
1763 return 0;
1764}
1765
1766static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
1767{
1768 struct perf_evsel *evsel;
1769
1770 evlist__for_each_reverse(evlist, evsel) {
1771 const char *name = perf_evsel__name(evsel);
1772
1773 if (!strcmp(name, "sched:sched_switch"))
1774 return evsel;
1775 }
1776
1777 return NULL;
1778}
1779
1780static const char * const intel_pt_info_fmts[] = {
1781 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
1782 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
1783 [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
1784 [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
1785 [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
1786 [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
1787 [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
1788 [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
1789 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
1790 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
1791 [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
1792 [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
1793 [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
1794 [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
1795};
1796
1797static void intel_pt_print_info(u64 *arr, int start, int finish)
1798{
1799 int i;
1800
1801 if (!dump_trace)
1802 return;
1803
1804 for (i = start; i <= finish; i++)
1805 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
1806}
1807
1808int intel_pt_process_auxtrace_info(union perf_event *event,
1809 struct perf_session *session)
1810{
1811 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
1812 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
1813 struct intel_pt *pt;
1814 int err;
1815
1816 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
1817 min_sz)
1818 return -EINVAL;
1819
1820 pt = zalloc(sizeof(struct intel_pt));
1821 if (!pt)
1822 return -ENOMEM;
1823
1824 err = auxtrace_queues__init(&pt->queues);
1825 if (err)
1826 goto err_free;
1827
1828 intel_pt_log_set_name(INTEL_PT_PMU_NAME);
1829
1830 pt->session = session;
1831 pt->machine = &session->machines.host; /* No kvm support */
1832 pt->auxtrace_type = auxtrace_info->type;
1833 pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
1834 pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
1835 pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
1836 pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
1837 pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
1838 pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
1839 pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
1840 pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
1841 pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
1842 pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
1843 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
1844 INTEL_PT_PER_CPU_MMAPS);
1845
1846 if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
1847 (sizeof(u64) * INTEL_PT_CYC_BIT)) {
1848 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
1849 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
1850 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
1851 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
1852 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
1853 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
1854 INTEL_PT_CYC_BIT);
1855 }
1856
1857 pt->timeless_decoding = intel_pt_timeless_decoding(pt);
1858 pt->have_tsc = intel_pt_have_tsc(pt);
1859 pt->sampling_mode = false;
1860 pt->est_tsc = !pt->timeless_decoding;
1861
1862 pt->unknown_thread = thread__new(999999999, 999999999);
1863 if (!pt->unknown_thread) {
1864 err = -ENOMEM;
1865 goto err_free_queues;
1866 }
1867 err = thread__set_comm(pt->unknown_thread, "unknown", 0);
1868 if (err)
1869 goto err_delete_thread;
1870 if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
1871 err = -ENOMEM;
1872 goto err_delete_thread;
1873 }
1874
1875 pt->auxtrace.process_event = intel_pt_process_event;
1876 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
1877 pt->auxtrace.flush_events = intel_pt_flush;
1878 pt->auxtrace.free_events = intel_pt_free_events;
1879 pt->auxtrace.free = intel_pt_free;
1880 session->auxtrace = &pt->auxtrace;
1881
1882 if (dump_trace)
1883 return 0;
1884
1885 if (pt->have_sched_switch == 1) {
1886 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
1887 if (!pt->switch_evsel) {
1888 pr_err("%s: missing sched_switch event\n", __func__);
1889 goto err_delete_thread;
1890 }
1891 }
1892
1893 if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
1894 pt->synth_opts = *session->itrace_synth_opts;
1895 } else {
1896 itrace_synth_opts__set_default(&pt->synth_opts);
1897 if (use_browser != -1) {
1898 pt->synth_opts.branches = false;
1899 pt->synth_opts.callchain = true;
1900 }
1901 }
1902
1903 if (pt->synth_opts.log)
1904 intel_pt_log_enable();
1905
1906 /* Maximum non-turbo ratio is TSC freq / 100 MHz */
1907 if (pt->tc.time_mult) {
1908 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
1909
1910 pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
1911 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
1912 intel_pt_log("Maximum non-turbo ratio %u\n",
1913 pt->max_non_turbo_ratio);
1914 }
1915
1916 if (pt->synth_opts.calls)
1917 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
1918 PERF_IP_FLAG_TRACE_END;
1919 if (pt->synth_opts.returns)
1920 pt->branches_filter |= PERF_IP_FLAG_RETURN |
1921 PERF_IP_FLAG_TRACE_BEGIN;
1922
1923 if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
1924 symbol_conf.use_callchain = true;
1925 if (callchain_register_param(&callchain_param) < 0) {
1926 symbol_conf.use_callchain = false;
1927 pt->synth_opts.callchain = false;
1928 }
1929 }
1930
1931 err = intel_pt_synth_events(pt, session);
1932 if (err)
1933 goto err_delete_thread;
1934
1935 err = auxtrace_queues__process_index(&pt->queues, session);
1936 if (err)
1937 goto err_delete_thread;
1938
1939 if (pt->queues.populated)
1940 pt->data_queued = true;
1941
1942 if (pt->timeless_decoding)
1943 pr_debug2("Intel PT decoding without timestamps\n");
1944
1945 return 0;
1946
1947err_delete_thread:
1948 thread__delete(pt->unknown_thread);
1949err_free_queues:
1950 intel_pt_log_disable();
1951 auxtrace_queues__free(&pt->queues);
1952 session->auxtrace = NULL;
1953err_free:
1954 free(pt);
1955 return err;
1956}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
new file mode 100644
index 000000000000..0065949df693
--- /dev/null
+++ b/tools/perf/util/intel-pt.h
@@ -0,0 +1,56 @@
1/*
2 * intel_pt.h: Intel Processor Trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef INCLUDE__PERF_INTEL_PT_H__
17#define INCLUDE__PERF_INTEL_PT_H__
18
19#define INTEL_PT_PMU_NAME "intel_pt"
20
21enum {
22 INTEL_PT_PMU_TYPE,
23 INTEL_PT_TIME_SHIFT,
24 INTEL_PT_TIME_MULT,
25 INTEL_PT_TIME_ZERO,
26 INTEL_PT_CAP_USER_TIME_ZERO,
27 INTEL_PT_TSC_BIT,
28 INTEL_PT_NORETCOMP_BIT,
29 INTEL_PT_HAVE_SCHED_SWITCH,
30 INTEL_PT_SNAPSHOT_MODE,
31 INTEL_PT_PER_CPU_MMAPS,
32 INTEL_PT_MTC_BIT,
33 INTEL_PT_MTC_FREQ_BITS,
34 INTEL_PT_TSC_CTC_N,
35 INTEL_PT_TSC_CTC_D,
36 INTEL_PT_CYC_BIT,
37 INTEL_PT_AUXTRACE_PRIV_MAX,
38};
39
40#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
41
42struct auxtrace_record;
43struct perf_tool;
44union perf_event;
45struct perf_session;
46struct perf_event_attr;
47struct perf_pmu;
48
49struct auxtrace_record *intel_pt_recording_init(int *err);
50
51int intel_pt_process_auxtrace_info(union perf_event *event,
52 struct perf_session *session);
53
54struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
55
56#endif
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
new file mode 100644
index 000000000000..4f6a4780bd5f
--- /dev/null
+++ b/tools/perf/util/llvm-utils.c
@@ -0,0 +1,408 @@
1/*
2 * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
3 * Copyright (C) 2015, Huawei Inc.
4 */
5
6#include <stdio.h>
7#include <sys/utsname.h>
8#include "util.h"
9#include "debug.h"
10#include "llvm-utils.h"
11#include "cache.h"
12
13#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
14 "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS " \
15 "$KERNEL_INC_OPTIONS -Wno-unused-value " \
16 "-Wno-pointer-sign -working-directory " \
17 "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
18
19struct llvm_param llvm_param = {
20 .clang_path = "clang",
21 .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
22 .clang_opt = NULL,
23 .kbuild_dir = NULL,
24 .kbuild_opts = NULL,
25 .user_set_param = false,
26};
27
28int perf_llvm_config(const char *var, const char *value)
29{
30 if (prefixcmp(var, "llvm."))
31 return 0;
32 var += sizeof("llvm.") - 1;
33
34 if (!strcmp(var, "clang-path"))
35 llvm_param.clang_path = strdup(value);
36 else if (!strcmp(var, "clang-bpf-cmd-template"))
37 llvm_param.clang_bpf_cmd_template = strdup(value);
38 else if (!strcmp(var, "clang-opt"))
39 llvm_param.clang_opt = strdup(value);
40 else if (!strcmp(var, "kbuild-dir"))
41 llvm_param.kbuild_dir = strdup(value);
42 else if (!strcmp(var, "kbuild-opts"))
43 llvm_param.kbuild_opts = strdup(value);
44 else
45 return -1;
46 llvm_param.user_set_param = true;
47 return 0;
48}
49
50static int
51search_program(const char *def, const char *name,
52 char *output)
53{
54 char *env, *path, *tmp = NULL;
55 char buf[PATH_MAX];
56 int ret;
57
58 output[0] = '\0';
59 if (def && def[0] != '\0') {
60 if (def[0] == '/') {
61 if (access(def, F_OK) == 0) {
62 strlcpy(output, def, PATH_MAX);
63 return 0;
64 }
65 } else if (def[0] != '\0')
66 name = def;
67 }
68
69 env = getenv("PATH");
70 if (!env)
71 return -1;
72 env = strdup(env);
73 if (!env)
74 return -1;
75
76 ret = -ENOENT;
77 path = strtok_r(env, ":", &tmp);
78 while (path) {
79 scnprintf(buf, sizeof(buf), "%s/%s", path, name);
80 if (access(buf, F_OK) == 0) {
81 strlcpy(output, buf, PATH_MAX);
82 ret = 0;
83 break;
84 }
85 path = strtok_r(NULL, ":", &tmp);
86 }
87
88 free(env);
89 return ret;
90}
91
92#define READ_SIZE 4096
93static int
94read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
95{
96 int err = 0;
97 void *buf = NULL;
98 FILE *file = NULL;
99 size_t read_sz = 0, buf_sz = 0;
100
101 file = popen(cmd, "r");
102 if (!file) {
103 pr_err("ERROR: unable to popen cmd: %s\n",
104 strerror(errno));
105 return -EINVAL;
106 }
107
108 while (!feof(file) && !ferror(file)) {
109 /*
110 * Make buf_sz always have obe byte extra space so we
111 * can put '\0' there.
112 */
113 if (buf_sz - read_sz < READ_SIZE + 1) {
114 void *new_buf;
115
116 buf_sz = read_sz + READ_SIZE + 1;
117 new_buf = realloc(buf, buf_sz);
118
119 if (!new_buf) {
120 pr_err("ERROR: failed to realloc memory\n");
121 err = -ENOMEM;
122 goto errout;
123 }
124
125 buf = new_buf;
126 }
127 read_sz += fread(buf + read_sz, 1, READ_SIZE, file);
128 }
129
130 if (buf_sz - read_sz < 1) {
131 pr_err("ERROR: internal error\n");
132 err = -EINVAL;
133 goto errout;
134 }
135
136 if (ferror(file)) {
137 pr_err("ERROR: error occurred when reading from pipe: %s\n",
138 strerror(errno));
139 err = -EIO;
140 goto errout;
141 }
142
143 err = WEXITSTATUS(pclose(file));
144 file = NULL;
145 if (err) {
146 err = -EINVAL;
147 goto errout;
148 }
149
150 /*
151 * If buf is string, give it terminal '\0' to make our life
152 * easier. If buf is not string, that '\0' is out of space
153 * indicated by read_sz so caller won't even notice it.
154 */
155 ((char *)buf)[read_sz] = '\0';
156
157 if (!p_buf)
158 free(buf);
159 else
160 *p_buf = buf;
161
162 if (p_read_sz)
163 *p_read_sz = read_sz;
164 return 0;
165
166errout:
167 if (file)
168 pclose(file);
169 free(buf);
170 if (p_buf)
171 *p_buf = NULL;
172 if (p_read_sz)
173 *p_read_sz = 0;
174 return err;
175}
176
177static inline void
178force_set_env(const char *var, const char *value)
179{
180 if (value) {
181 setenv(var, value, 1);
182 pr_debug("set env: %s=%s\n", var, value);
183 } else {
184 unsetenv(var);
185 pr_debug("unset env: %s\n", var);
186 }
187}
188
189static void
190version_notice(void)
191{
192 pr_err(
193" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n"
194" \tYou may want to try git trunk:\n"
195" \t\tgit clone http://llvm.org/git/llvm.git\n"
196" \t\t and\n"
197" \t\tgit clone http://llvm.org/git/clang.git\n\n"
198" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
199" \tdebian/ubuntu:\n"
200" \t\thttp://llvm.org/apt\n\n"
201" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
202" \toption in [llvm] section of ~/.perfconfig to:\n\n"
203" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n"
204" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
205" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
206" \t(Replace /path/to/llc with path to your llc)\n\n"
207);
208}
209
210static int detect_kbuild_dir(char **kbuild_dir)
211{
212 const char *test_dir = llvm_param.kbuild_dir;
213 const char *prefix_dir = "";
214 const char *suffix_dir = "";
215
216 char *autoconf_path;
217 struct utsname utsname;
218
219 int err;
220
221 if (!test_dir) {
222 err = uname(&utsname);
223 if (err) {
224 pr_warning("uname failed: %s\n", strerror(errno));
225 return -EINVAL;
226 }
227
228 test_dir = utsname.release;
229 prefix_dir = "/lib/modules/";
230 suffix_dir = "/build";
231 }
232
233 err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h",
234 prefix_dir, test_dir, suffix_dir);
235 if (err < 0)
236 return -ENOMEM;
237
238 if (access(autoconf_path, R_OK) == 0) {
239 free(autoconf_path);
240
241 err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir,
242 suffix_dir);
243 if (err < 0)
244 return -ENOMEM;
245 return 0;
246 }
247 free(autoconf_path);
248 return -ENOENT;
249}
250
251static const char *kinc_fetch_script =
252"#!/usr/bin/env sh\n"
253"if ! test -d \"$KBUILD_DIR\"\n"
254"then\n"
255" exit -1\n"
256"fi\n"
257"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
258"then\n"
259" exit -1\n"
260"fi\n"
261"TMPDIR=`mktemp -d`\n"
262"if test -z \"$TMPDIR\"\n"
263"then\n"
264" exit -1\n"
265"fi\n"
266"cat << EOF > $TMPDIR/Makefile\n"
267"obj-y := dummy.o\n"
268"\\$(obj)/%.o: \\$(src)/%.c\n"
269"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
270"EOF\n"
271"touch $TMPDIR/dummy.c\n"
272"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
273"RET=$?\n"
274"rm -rf $TMPDIR\n"
275"exit $RET\n";
276
277static inline void
278get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
279{
280 int err;
281
282 if (!kbuild_dir || !kbuild_include_opts)
283 return;
284
285 *kbuild_dir = NULL;
286 *kbuild_include_opts = NULL;
287
288 if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) {
289 pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n");
290 pr_debug("Skip kbuild options detection.\n");
291 return;
292 }
293
294 err = detect_kbuild_dir(kbuild_dir);
295 if (err) {
296 pr_warning(
297"WARNING:\tunable to get correct kernel building directory.\n"
298"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n"
299" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n"
300" \tdetection.\n\n");
301 return;
302 }
303
304 pr_debug("Kernel build dir is set to %s\n", *kbuild_dir);
305 force_set_env("KBUILD_DIR", *kbuild_dir);
306 force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts);
307 err = read_from_pipe(kinc_fetch_script,
308 (void **)kbuild_include_opts,
309 NULL);
310 if (err) {
311 pr_warning(
312"WARNING:\tunable to get kernel include directories from '%s'\n"
313"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n"
314" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n"
315" \toption in [llvm] to \"\" to suppress this detection.\n\n",
316 *kbuild_dir);
317
318 free(*kbuild_dir);
319 *kbuild_dir = NULL;
320 return;
321 }
322
323 pr_debug("include option is set to %s\n", *kbuild_include_opts);
324}
325
326int llvm__compile_bpf(const char *path, void **p_obj_buf,
327 size_t *p_obj_buf_sz)
328{
329 int err;
330 char clang_path[PATH_MAX];
331 const char *clang_opt = llvm_param.clang_opt;
332 const char *template = llvm_param.clang_bpf_cmd_template;
333 char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
334 void *obj_buf = NULL;
335 size_t obj_buf_sz;
336
337 if (!template)
338 template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
339
340 err = search_program(llvm_param.clang_path,
341 "clang", clang_path);
342 if (err) {
343 pr_err(
344"ERROR:\tunable to find clang.\n"
345"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
346" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n");
347 version_notice();
348 return -ENOENT;
349 }
350
351 /*
352 * This is an optional work. Even it fail we can continue our
353 * work. Needn't to check error return.
354 */
355 get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
356
357 force_set_env("CLANG_EXEC", clang_path);
358 force_set_env("CLANG_OPTIONS", clang_opt);
359 force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
360 force_set_env("WORKING_DIR", kbuild_dir ? : ".");
361
362 /*
363 * Since we may reset clang's working dir, path of source file
364 * should be transferred into absolute path, except we want
365 * stdin to be source file (testing).
366 */
367 force_set_env("CLANG_SOURCE",
368 (path[0] == '-') ? path :
369 make_nonrelative_path(path));
370
371 pr_debug("llvm compiling command template: %s\n", template);
372 err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
373 if (err) {
374 pr_err("ERROR:\tunable to compile %s\n", path);
375 pr_err("Hint:\tCheck error message shown above.\n");
376 pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
377 pr_err(" \t\tclang -target bpf -O2 -c %s\n", path);
378 pr_err(" \twith proper -I and -D options.\n");
379 goto errout;
380 }
381
382 free(kbuild_dir);
383 free(kbuild_include_opts);
384 if (!p_obj_buf)
385 free(obj_buf);
386 else
387 *p_obj_buf = obj_buf;
388
389 if (p_obj_buf_sz)
390 *p_obj_buf_sz = obj_buf_sz;
391 return 0;
392errout:
393 free(kbuild_dir);
394 free(kbuild_include_opts);
395 free(obj_buf);
396 if (p_obj_buf)
397 *p_obj_buf = NULL;
398 if (p_obj_buf_sz)
399 *p_obj_buf_sz = 0;
400 return err;
401}
402
403int llvm__search_clang(void)
404{
405 char clang_path[PATH_MAX];
406
407 return search_program(llvm_param.clang_path, "clang", clang_path);
408}
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
new file mode 100644
index 000000000000..5b3cf1c229e2
--- /dev/null
+++ b/tools/perf/util/llvm-utils.h
@@ -0,0 +1,49 @@
1/*
2 * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
3 * Copyright (C) 2015, Huawei Inc.
4 */
5#ifndef __LLVM_UTILS_H
6#define __LLVM_UTILS_H
7
8#include "debug.h"
9
10struct llvm_param {
11 /* Path of clang executable */
12 const char *clang_path;
13 /*
14 * Template of clang bpf compiling. 5 env variables
15 * can be used:
16 * $CLANG_EXEC: Path to clang.
17 * $CLANG_OPTIONS: Extra options to clang.
18 * $KERNEL_INC_OPTIONS: Kernel include directories.
19 * $WORKING_DIR: Kernel source directory.
20 * $CLANG_SOURCE: Source file to be compiled.
21 */
22 const char *clang_bpf_cmd_template;
23 /* Will be filled in $CLANG_OPTIONS */
24 const char *clang_opt;
25 /* Where to find kbuild system */
26 const char *kbuild_dir;
27 /*
28 * Arguments passed to make, like 'ARCH=arm' if doing cross
29 * compiling. Should not be used for dynamic compiling.
30 */
31 const char *kbuild_opts;
32 /*
33 * Default is false. If one of the above fields is set by user
34 * explicitly then user_set_llvm is set to true. This is used
35 * for perf test. If user doesn't set anything in .perfconfig
36 * and clang is not found, don't trigger llvm test.
37 */
38 bool user_set_param;
39};
40
41extern struct llvm_param llvm_param;
42extern int perf_llvm_config(const char *var, const char *value);
43
44extern int llvm__compile_bpf(const char *path, void **p_obj_buf,
45 size_t *p_obj_buf_sz);
46
47/* This function is for test__llvm() use only */
48extern int llvm__search_clang(void);
49#endif
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f1a4c833121e..6309f7ceb08f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -250,7 +250,7 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid)
250 static struct strlist *seen; 250 static struct strlist *seen;
251 251
252 if (!seen) 252 if (!seen)
253 seen = strlist__new(true, NULL); 253 seen = strlist__new(NULL, NULL);
254 254
255 if (!strlist__has_entry(seen, path)) { 255 if (!strlist__has_entry(seen, path)) {
256 pr_err("Can't access file %s\n", path); 256 pr_err("Can't access file %s\n", path);
@@ -550,6 +550,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
550 return 0; 550 return 0;
551} 551}
552 552
553int machine__process_switch_event(struct machine *machine __maybe_unused,
554 union perf_event *event)
555{
556 if (dump_trace)
557 perf_event__fprintf_switch(event, stdout);
558 return 0;
559}
560
553struct map *machine__findnew_module_map(struct machine *machine, u64 start, 561struct map *machine__findnew_module_map(struct machine *machine, u64 start,
554 const char *filename) 562 const char *filename)
555{ 563{
@@ -1467,6 +1475,9 @@ int machine__process_event(struct machine *machine, union perf_event *event,
1467 ret = machine__process_itrace_start_event(machine, event); break; 1475 ret = machine__process_itrace_start_event(machine, event); break;
1468 case PERF_RECORD_LOST_SAMPLES: 1476 case PERF_RECORD_LOST_SAMPLES:
1469 ret = machine__process_lost_samples_event(machine, event, sample); break; 1477 ret = machine__process_lost_samples_event(machine, event, sample); break;
1478 case PERF_RECORD_SWITCH:
1479 case PERF_RECORD_SWITCH_CPU_WIDE:
1480 ret = machine__process_switch_event(machine, event); break;
1470 default: 1481 default:
1471 ret = -1; 1482 ret = -1;
1472 break; 1483 break;
@@ -2009,3 +2020,17 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
2009{ 2020{
2010 return dsos__findnew(&machine->dsos, filename); 2021 return dsos__findnew(&machine->dsos, filename);
2011} 2022}
2023
2024char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
2025{
2026 struct machine *machine = vmachine;
2027 struct map *map;
2028 struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL);
2029
2030 if (sym == NULL)
2031 return NULL;
2032
2033 *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
2034 *addrp = map->unmap_ip(map, sym->start);
2035 return sym->name;
2036}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 887798e511e9..ea5cb4a621db 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -87,6 +87,8 @@ int machine__process_aux_event(struct machine *machine,
87 union perf_event *event); 87 union perf_event *event);
88int machine__process_itrace_start_event(struct machine *machine, 88int machine__process_itrace_start_event(struct machine *machine,
89 union perf_event *event); 89 union perf_event *event);
90int machine__process_switch_event(struct machine *machine __maybe_unused,
91 union perf_event *event);
90int machine__process_mmap_event(struct machine *machine, union perf_event *event, 92int machine__process_mmap_event(struct machine *machine, union perf_event *event,
91 struct perf_sample *sample); 93 struct perf_sample *sample);
92int machine__process_mmap2_event(struct machine *machine, union perf_event *event, 94int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
@@ -237,5 +239,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target,
237pid_t machine__get_current_tid(struct machine *machine, int cpu); 239pid_t machine__get_current_tid(struct machine *machine, int cpu);
238int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, 240int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
239 pid_t tid); 241 pid_t tid);
242/*
243 * For use with libtraceevent's pevent_set_function_resolver()
244 */
245char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
240 246
241#endif /* __PERF_MACHINE_H */ 247#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b5a5e9c02437..b1c475d9b240 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -224,6 +224,20 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
224 return map; 224 return map;
225} 225}
226 226
227/*
228 * Use this and __map__is_kmodule() for map instances that are in
229 * machine->kmaps, and thus have map->groups->machine all properly set, to
230 * disambiguate between the kernel and modules.
231 *
232 * When the need arises, introduce map__is_{kernel,kmodule)() that
233 * checks (map->groups != NULL && map->groups->machine != NULL &&
234 * map->dso->kernel) before calling __map__is_{kernel,kmodule}())
235 */
236bool __map__is_kernel(const struct map *map)
237{
238 return map->groups->machine->vmlinux_maps[map->type] == map;
239}
240
227static void map__exit(struct map *map) 241static void map__exit(struct map *map)
228{ 242{
229 BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); 243 BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
@@ -334,9 +348,18 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
334 return dso__find_symbol_by_name(map->dso, map->type, name); 348 return dso__find_symbol_by_name(map->dso, map->type, name);
335} 349}
336 350
337struct map *map__clone(struct map *map) 351struct map *map__clone(struct map *from)
338{ 352{
339 return memdup(map, sizeof(*map)); 353 struct map *map = memdup(from, sizeof(*map));
354
355 if (map != NULL) {
356 atomic_set(&map->refcnt, 1);
357 RB_CLEAR_NODE(&map->rb_node);
358 dso__get(map->dso);
359 map->groups = NULL;
360 }
361
362 return map;
340} 363}
341 364
342int map__overlap(struct map *l, struct map *r) 365int map__overlap(struct map *l, struct map *r)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d73e687b224e..57829e89b78b 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -256,4 +256,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
256struct map *map_groups__find_by_name(struct map_groups *mg, 256struct map *map_groups__find_by_name(struct map_groups *mg,
257 enum map_type type, const char *name); 257 enum map_type type, const char *name);
258 258
259bool __map__is_kernel(const struct map *map);
260
261static inline bool __map__is_kmodule(const struct map *map)
262{
263 return !__map__is_kernel(map);
264}
265
259#endif /* __PERF_MAP_H */ 266#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 52be201b9b25..b1b9e2385f4b 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe)
220 else if (last_ts <= limit) 220 else if (last_ts <= limit)
221 oe->last = list_entry(head->prev, struct ordered_event, list); 221 oe->last = list_entry(head->prev, struct ordered_event, list);
222 222
223 if (show_progress)
224 ui_progress__finish();
225
223 return 0; 226 return 0;
224} 227}
225 228
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 09f8d2357108..d826e6f515db 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -276,7 +276,8 @@ const char *event_type(int type)
276static struct perf_evsel * 276static struct perf_evsel *
277__add_event(struct list_head *list, int *idx, 277__add_event(struct list_head *list, int *idx,
278 struct perf_event_attr *attr, 278 struct perf_event_attr *attr,
279 char *name, struct cpu_map *cpus) 279 char *name, struct cpu_map *cpus,
280 struct list_head *config_terms)
280{ 281{
281 struct perf_evsel *evsel; 282 struct perf_evsel *evsel;
282 283
@@ -291,14 +292,19 @@ __add_event(struct list_head *list, int *idx,
291 292
292 if (name) 293 if (name)
293 evsel->name = strdup(name); 294 evsel->name = strdup(name);
295
296 if (config_terms)
297 list_splice(config_terms, &evsel->config_terms);
298
294 list_add_tail(&evsel->node, list); 299 list_add_tail(&evsel->node, list);
295 return evsel; 300 return evsel;
296} 301}
297 302
298static int add_event(struct list_head *list, int *idx, 303static int add_event(struct list_head *list, int *idx,
299 struct perf_event_attr *attr, char *name) 304 struct perf_event_attr *attr, char *name,
305 struct list_head *config_terms)
300{ 306{
301 return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; 307 return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
302} 308}
303 309
304static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) 310static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -377,7 +383,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
377 memset(&attr, 0, sizeof(attr)); 383 memset(&attr, 0, sizeof(attr));
378 attr.config = cache_type | (cache_op << 8) | (cache_result << 16); 384 attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
379 attr.type = PERF_TYPE_HW_CACHE; 385 attr.type = PERF_TYPE_HW_CACHE;
380 return add_event(list, idx, &attr, name); 386 return add_event(list, idx, &attr, name, NULL);
381} 387}
382 388
383static int add_tracepoint(struct list_head *list, int *idx, 389static int add_tracepoint(struct list_head *list, int *idx,
@@ -539,7 +545,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
539 attr.type = PERF_TYPE_BREAKPOINT; 545 attr.type = PERF_TYPE_BREAKPOINT;
540 attr.sample_period = 1; 546 attr.sample_period = 1;
541 547
542 return add_event(list, idx, &attr, NULL); 548 return add_event(list, idx, &attr, NULL, NULL);
543} 549}
544 550
545static int check_type_val(struct parse_events_term *term, 551static int check_type_val(struct parse_events_term *term,
@@ -590,7 +596,9 @@ do { \
590 break; 596 break;
591 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: 597 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
592 CHECK_TYPE_VAL(NUM); 598 CHECK_TYPE_VAL(NUM);
593 attr->sample_period = term->val.num; 599 break;
600 case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
601 CHECK_TYPE_VAL(NUM);
594 break; 602 break;
595 case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: 603 case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
596 /* 604 /*
@@ -598,6 +606,20 @@ do { \
598 * attr->branch_sample_type = term->val.num; 606 * attr->branch_sample_type = term->val.num;
599 */ 607 */
600 break; 608 break;
609 case PARSE_EVENTS__TERM_TYPE_TIME:
610 CHECK_TYPE_VAL(NUM);
611 if (term->val.num > 1) {
612 err->str = strdup("expected 0 or 1");
613 err->idx = term->err_val;
614 return -EINVAL;
615 }
616 break;
617 case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
618 CHECK_TYPE_VAL(STR);
619 break;
620 case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
621 CHECK_TYPE_VAL(NUM);
622 break;
601 case PARSE_EVENTS__TERM_TYPE_NAME: 623 case PARSE_EVENTS__TERM_TYPE_NAME:
602 CHECK_TYPE_VAL(STR); 624 CHECK_TYPE_VAL(STR);
603 break; 625 break;
@@ -622,22 +644,71 @@ static int config_attr(struct perf_event_attr *attr,
622 return 0; 644 return 0;
623} 645}
624 646
647static int get_config_terms(struct list_head *head_config,
648 struct list_head *head_terms __maybe_unused)
649{
650#define ADD_CONFIG_TERM(__type, __name, __val) \
651do { \
652 struct perf_evsel_config_term *__t; \
653 \
654 __t = zalloc(sizeof(*__t)); \
655 if (!__t) \
656 return -ENOMEM; \
657 \
658 INIT_LIST_HEAD(&__t->list); \
659 __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
660 __t->val.__name = __val; \
661 list_add_tail(&__t->list, head_terms); \
662} while (0)
663
664 struct parse_events_term *term;
665
666 list_for_each_entry(term, head_config, list) {
667 switch (term->type_term) {
668 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
669 ADD_CONFIG_TERM(PERIOD, period, term->val.num);
670 break;
671 case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
672 ADD_CONFIG_TERM(FREQ, freq, term->val.num);
673 break;
674 case PARSE_EVENTS__TERM_TYPE_TIME:
675 ADD_CONFIG_TERM(TIME, time, term->val.num);
676 break;
677 case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
678 ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
679 break;
680 case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
681 ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
682 break;
683 default:
684 break;
685 }
686 }
687#undef ADD_EVSEL_CONFIG
688 return 0;
689}
690
625int parse_events_add_numeric(struct parse_events_evlist *data, 691int parse_events_add_numeric(struct parse_events_evlist *data,
626 struct list_head *list, 692 struct list_head *list,
627 u32 type, u64 config, 693 u32 type, u64 config,
628 struct list_head *head_config) 694 struct list_head *head_config)
629{ 695{
630 struct perf_event_attr attr; 696 struct perf_event_attr attr;
697 LIST_HEAD(config_terms);
631 698
632 memset(&attr, 0, sizeof(attr)); 699 memset(&attr, 0, sizeof(attr));
633 attr.type = type; 700 attr.type = type;
634 attr.config = config; 701 attr.config = config;
635 702
636 if (head_config && 703 if (head_config) {
637 config_attr(&attr, head_config, data->error)) 704 if (config_attr(&attr, head_config, data->error))
638 return -EINVAL; 705 return -EINVAL;
706
707 if (get_config_terms(head_config, &config_terms))
708 return -ENOMEM;
709 }
639 710
640 return add_event(list, &data->idx, &attr, NULL); 711 return add_event(list, &data->idx, &attr, NULL, &config_terms);
641} 712}
642 713
643static int parse_events__is_name_term(struct parse_events_term *term) 714static int parse_events__is_name_term(struct parse_events_term *term)
@@ -664,6 +735,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
664 struct perf_pmu_info info; 735 struct perf_pmu_info info;
665 struct perf_pmu *pmu; 736 struct perf_pmu *pmu;
666 struct perf_evsel *evsel; 737 struct perf_evsel *evsel;
738 LIST_HEAD(config_terms);
667 739
668 pmu = perf_pmu__find(name); 740 pmu = perf_pmu__find(name);
669 if (!pmu) 741 if (!pmu)
@@ -678,7 +750,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
678 750
679 if (!head_config) { 751 if (!head_config) {
680 attr.type = pmu->type; 752 attr.type = pmu->type;
681 evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus); 753 evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL);
682 return evsel ? 0 : -ENOMEM; 754 return evsel ? 0 : -ENOMEM;
683 } 755 }
684 756
@@ -692,11 +764,15 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
692 if (config_attr(&attr, head_config, data->error)) 764 if (config_attr(&attr, head_config, data->error))
693 return -EINVAL; 765 return -EINVAL;
694 766
767 if (get_config_terms(head_config, &config_terms))
768 return -ENOMEM;
769
695 if (perf_pmu__config(pmu, &attr, head_config, data->error)) 770 if (perf_pmu__config(pmu, &attr, head_config, data->error))
696 return -EINVAL; 771 return -EINVAL;
697 772
698 evsel = __add_event(list, &data->idx, &attr, 773 evsel = __add_event(list, &data->idx, &attr,
699 pmu_event_name(head_config), pmu->cpus); 774 pmu_event_name(head_config), pmu->cpus,
775 &config_terms);
700 if (evsel) { 776 if (evsel) {
701 evsel->unit = info.unit; 777 evsel->unit = info.unit;
702 evsel->scale = info.scale; 778 evsel->scale = info.scale;
@@ -1065,8 +1141,13 @@ int parse_events(struct perf_evlist *evlist, const char *str,
1065 perf_pmu__parse_cleanup(); 1141 perf_pmu__parse_cleanup();
1066 if (!ret) { 1142 if (!ret) {
1067 int entries = data.idx - evlist->nr_entries; 1143 int entries = data.idx - evlist->nr_entries;
1144 struct perf_evsel *last;
1145
1068 perf_evlist__splice_list_tail(evlist, &data.list, entries); 1146 perf_evlist__splice_list_tail(evlist, &data.list, entries);
1069 evlist->nr_groups += data.nr_groups; 1147 evlist->nr_groups += data.nr_groups;
1148 last = perf_evlist__last(evlist);
1149 last->cmdline_group_boundary = true;
1150
1070 return 0; 1151 return 0;
1071 } 1152 }
1072 1153
@@ -1105,7 +1186,7 @@ static void parse_events_print_error(struct parse_events_error *err,
1105 * Maximum error index indent, we will cut 1186 * Maximum error index indent, we will cut
1106 * the event string if it's bigger. 1187 * the event string if it's bigger.
1107 */ 1188 */
1108 int max_err_idx = 10; 1189 int max_err_idx = 13;
1109 1190
1110 /* 1191 /*
1111 * Let's be specific with the message when 1192 * Let's be specific with the message when
@@ -1162,30 +1243,93 @@ int parse_events_option(const struct option *opt, const char *str,
1162 return ret; 1243 return ret;
1163} 1244}
1164 1245
1165int parse_filter(const struct option *opt, const char *str, 1246static int
1166 int unset __maybe_unused) 1247foreach_evsel_in_last_glob(struct perf_evlist *evlist,
1248 int (*func)(struct perf_evsel *evsel,
1249 const void *arg),
1250 const void *arg)
1167{ 1251{
1168 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
1169 struct perf_evsel *last = NULL; 1252 struct perf_evsel *last = NULL;
1253 int err;
1170 1254
1171 if (evlist->nr_entries > 0) 1255 if (evlist->nr_entries > 0)
1172 last = perf_evlist__last(evlist); 1256 last = perf_evlist__last(evlist);
1173 1257
1174 if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { 1258 do {
1259 err = (*func)(last, arg);
1260 if (err)
1261 return -1;
1262 if (!last)
1263 return 0;
1264
1265 if (last->node.prev == &evlist->entries)
1266 return 0;
1267 last = list_entry(last->node.prev, struct perf_evsel, node);
1268 } while (!last->cmdline_group_boundary);
1269
1270 return 0;
1271}
1272
1273static int set_filter(struct perf_evsel *evsel, const void *arg)
1274{
1275 const char *str = arg;
1276
1277 if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
1175 fprintf(stderr, 1278 fprintf(stderr,
1176 "--filter option should follow a -e tracepoint option\n"); 1279 "--filter option should follow a -e tracepoint option\n");
1177 return -1; 1280 return -1;
1178 } 1281 }
1179 1282
1180 last->filter = strdup(str); 1283 if (perf_evsel__append_filter(evsel, "&&", str) < 0) {
1181 if (last->filter == NULL) { 1284 fprintf(stderr,
1182 fprintf(stderr, "not enough memory to hold filter string\n"); 1285 "not enough memory to hold filter string\n");
1183 return -1; 1286 return -1;
1184 } 1287 }
1185 1288
1186 return 0; 1289 return 0;
1187} 1290}
1188 1291
1292int parse_filter(const struct option *opt, const char *str,
1293 int unset __maybe_unused)
1294{
1295 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
1296
1297 return foreach_evsel_in_last_glob(evlist, set_filter,
1298 (const void *)str);
1299}
1300
1301static int add_exclude_perf_filter(struct perf_evsel *evsel,
1302 const void *arg __maybe_unused)
1303{
1304 char new_filter[64];
1305
1306 if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
1307 fprintf(stderr,
1308 "--exclude-perf option should follow a -e tracepoint option\n");
1309 return -1;
1310 }
1311
1312 snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
1313
1314 if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) {
1315 fprintf(stderr,
1316 "not enough memory to hold filter string\n");
1317 return -1;
1318 }
1319
1320 return 0;
1321}
1322
1323int exclude_perf(const struct option *opt,
1324 const char *arg __maybe_unused,
1325 int unset __maybe_unused)
1326{
1327 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
1328
1329 return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
1330 NULL);
1331}
1332
1189static const char * const event_type_descriptors[] = { 1333static const char * const event_type_descriptors[] = {
1190 "Hardware event", 1334 "Hardware event",
1191 "Software event", 1335 "Software event",
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 131f29b2f132..a09b0e210997 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -34,6 +34,7 @@ extern int parse_events(struct perf_evlist *evlist, const char *str,
34 struct parse_events_error *error); 34 struct parse_events_error *error);
35extern int parse_events_terms(struct list_head *terms, const char *str); 35extern int parse_events_terms(struct list_head *terms, const char *str);
36extern int parse_filter(const struct option *opt, const char *str, int unset); 36extern int parse_filter(const struct option *opt, const char *str, int unset);
37extern int exclude_perf(const struct option *opt, const char *arg, int unset);
37 38
38#define EVENTS_HELP_MAX (128*1024) 39#define EVENTS_HELP_MAX (128*1024)
39 40
@@ -61,7 +62,11 @@ enum {
61 PARSE_EVENTS__TERM_TYPE_CONFIG2, 62 PARSE_EVENTS__TERM_TYPE_CONFIG2,
62 PARSE_EVENTS__TERM_TYPE_NAME, 63 PARSE_EVENTS__TERM_TYPE_NAME,
63 PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, 64 PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
65 PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
64 PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, 66 PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
67 PARSE_EVENTS__TERM_TYPE_TIME,
68 PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
69 PARSE_EVENTS__TERM_TYPE_STACKSIZE,
65}; 70};
66 71
67struct parse_events_term { 72struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 13cef3c65565..936d566f48d8 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -182,7 +182,11 @@ config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
182config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } 182config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
183name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } 183name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
184period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } 184period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
185freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
185branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } 186branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
187time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
188call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
189stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
186, { return ','; } 190, { return ','; }
187"/" { BEGIN(INITIAL); return '/'; } 191"/" { BEGIN(INITIAL); return '/'; }
188{name_minus} { return str(yyscanner, PE_NAME); } 192{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7bcb8c315615..89c91a1a67e7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name)
462 LIST_HEAD(aliases); 462 LIST_HEAD(aliases);
463 __u32 type; 463 __u32 type;
464 464
465 /* No support for intel_bts or intel_pt so disallow them */
466 if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt"))
467 return NULL;
468
469 /* 465 /*
470 * The pmu data we store & need consists of the pmu 466 * The pmu data we store & need consists of the pmu
471 * type value and format definitions. Load both right 467 * type value and format definitions. Load both right
@@ -542,7 +538,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
542} 538}
543 539
544static struct perf_pmu_format * 540static struct perf_pmu_format *
545pmu_find_format(struct list_head *formats, char *name) 541pmu_find_format(struct list_head *formats, const char *name)
546{ 542{
547 struct perf_pmu_format *format; 543 struct perf_pmu_format *format;
548 544
@@ -553,6 +549,21 @@ pmu_find_format(struct list_head *formats, char *name)
553 return NULL; 549 return NULL;
554} 550}
555 551
552__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
553{
554 struct perf_pmu_format *format = pmu_find_format(formats, name);
555 __u64 bits = 0;
556 int fbit;
557
558 if (!format)
559 return 0;
560
561 for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
562 bits |= 1ULL << fbit;
563
564 return bits;
565}
566
556/* 567/*
557 * Sets value based on the format definition (format parameter) 568 * Sets value based on the format definition (format parameter)
558 * and unformated value (value parameter). 569 * and unformated value (value parameter).
@@ -574,6 +585,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
574 } 585 }
575} 586}
576 587
588static __u64 pmu_format_max_value(const unsigned long *format)
589{
590 int w;
591
592 w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
593 if (!w)
594 return 0;
595 if (w < 64)
596 return (1ULL << w) - 1;
597 return -1;
598}
599
577/* 600/*
578 * Term is a string term, and might be a param-term. Try to look up it's value 601 * Term is a string term, and might be a param-term. Try to look up it's value
579 * in the remaining terms. 602 * in the remaining terms.
@@ -607,7 +630,9 @@ static char *formats_error_string(struct list_head *formats)
607{ 630{
608 struct perf_pmu_format *format; 631 struct perf_pmu_format *format;
609 char *err, *str; 632 char *err, *str;
610 static const char *static_terms = "config,config1,config2,name,period,branch_type\n"; 633 static const char *static_terms = "config,config1,config2,name,"
634 "period,freq,branch_type,time,"
635 "call-graph,stack-size\n";
611 unsigned i = 0; 636 unsigned i = 0;
612 637
613 if (!asprintf(&str, "valid terms:")) 638 if (!asprintf(&str, "valid terms:"))
@@ -647,7 +672,7 @@ static int pmu_config_term(struct list_head *formats,
647{ 672{
648 struct perf_pmu_format *format; 673 struct perf_pmu_format *format;
649 __u64 *vp; 674 __u64 *vp;
650 __u64 val; 675 __u64 val, max_val;
651 676
652 /* 677 /*
653 * If this is a parameter we've already used for parameterized-eval, 678 * If this is a parameter we've already used for parameterized-eval,
@@ -713,6 +738,22 @@ static int pmu_config_term(struct list_head *formats,
713 } else 738 } else
714 return -EINVAL; 739 return -EINVAL;
715 740
741 max_val = pmu_format_max_value(format->bits);
742 if (val > max_val) {
743 if (err) {
744 err->idx = term->err_val;
745 if (asprintf(&err->str,
746 "value too big for format, maximum is %llu",
747 (unsigned long long)max_val) < 0)
748 err->str = strdup("value too big for format");
749 return -EINVAL;
750 }
751 /*
752 * Assume we don't care if !err, in which case the value will be
753 * silently truncated.
754 */
755 }
756
716 pmu_format_value(format->bits, val, vp, zero); 757 pmu_format_value(format->bits, val, vp, zero);
717 return 0; 758 return 0;
718} 759}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 7b9c8cf8ae3e..5d7e84466bee 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
54 struct perf_event_attr *attr, 54 struct perf_event_attr *attr,
55 struct list_head *head_terms, 55 struct list_head *head_terms,
56 bool zero, struct parse_events_error *error); 56 bool zero, struct parse_events_error *error);
57__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
57int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, 58int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
58 struct perf_pmu_info *info); 59 struct perf_pmu_info *info);
59struct list_head *perf_pmu__alias(struct perf_pmu *pmu, 60struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 381f23a443c7..eb5f18b75402 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -45,6 +45,7 @@
45#include "trace-event.h" /* For __maybe_unused */ 45#include "trace-event.h" /* For __maybe_unused */
46#include "probe-event.h" 46#include "probe-event.h"
47#include "probe-finder.h" 47#include "probe-finder.h"
48#include "probe-file.h"
48#include "session.h" 49#include "session.h"
49 50
50#define MAX_CMDLEN 256 51#define MAX_CMDLEN 256
@@ -55,11 +56,7 @@ struct probe_conf probe_conf;
55 56
56#define semantic_error(msg ...) pr_err("Semantic error :" msg) 57#define semantic_error(msg ...) pr_err("Semantic error :" msg)
57 58
58/* If there is no space to write, returns -E2BIG. */ 59int e_snprintf(char *str, size_t size, const char *format, ...)
59static int e_snprintf(char *str, size_t size, const char *format, ...)
60 __attribute__((format(printf, 3, 4)));
61
62static int e_snprintf(char *str, size_t size, const char *format, ...)
63{ 60{
64 int ret; 61 int ret;
65 va_list ap; 62 va_list ap;
@@ -72,7 +69,6 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
72} 69}
73 70
74static char *synthesize_perf_probe_point(struct perf_probe_point *pp); 71static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
75static void clear_probe_trace_event(struct probe_trace_event *tev);
76static struct machine *host_machine; 72static struct machine *host_machine;
77 73
78/* Initialize symbol maps and path of vmlinux/modules */ 74/* Initialize symbol maps and path of vmlinux/modules */
@@ -519,7 +515,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
519 if (ret < 0) 515 if (ret < 0)
520 goto error; 516 goto error;
521 addr += stext; 517 addr += stext;
522 } else { 518 } else if (tp->symbol) {
523 addr = kernel_get_symbol_address_by_name(tp->symbol, false); 519 addr = kernel_get_symbol_address_by_name(tp->symbol, false);
524 if (addr == 0) 520 if (addr == 0)
525 goto error; 521 goto error;
@@ -709,9 +705,10 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
709 } 705 }
710 /* Error path : ntevs < 0 */ 706 /* Error path : ntevs < 0 */
711 pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); 707 pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
712 if (ntevs == -EBADF) { 708 if (ntevs < 0) {
713 pr_warning("Warning: No dwarf info found in the vmlinux - " 709 if (ntevs == -EBADF)
714 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); 710 pr_warning("Warning: No dwarf info found in the vmlinux - "
711 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
715 if (!need_dwarf) { 712 if (!need_dwarf) {
716 pr_debug("Trying to use symbols.\n"); 713 pr_debug("Trying to use symbols.\n");
717 return 0; 714 return 0;
@@ -1197,15 +1194,37 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
1197 *ptr++ = '\0'; 1194 *ptr++ = '\0';
1198 } 1195 }
1199 1196
1200 tmp = strdup(arg); 1197 if (arg[0] == '\0')
1201 if (tmp == NULL) 1198 tmp = NULL;
1202 return -ENOMEM; 1199 else {
1200 tmp = strdup(arg);
1201 if (tmp == NULL)
1202 return -ENOMEM;
1203 }
1203 1204
1204 if (file_spec) 1205 if (file_spec)
1205 pp->file = tmp; 1206 pp->file = tmp;
1206 else 1207 else {
1207 pp->function = tmp; 1208 pp->function = tmp;
1208 1209
1210 /*
1211 * Keep pp->function even if this is absolute address,
1212 * so it can mark whether abs_address is valid.
1213 * Which make 'perf probe lib.bin 0x0' possible.
1214 *
1215 * Note that checking length of tmp is not needed
1216 * because when we access tmp[1] we know tmp[0] is '0',
1217 * so tmp[1] should always valid (but could be '\0').
1218 */
1219 if (tmp && !strncmp(tmp, "0x", 2)) {
1220 pp->abs_address = strtoul(pp->function, &tmp, 0);
1221 if (*tmp != '\0') {
1222 semantic_error("Invalid absolute address.\n");
1223 return -EINVAL;
1224 }
1225 }
1226 }
1227
1209 /* Parse other options */ 1228 /* Parse other options */
1210 while (ptr) { 1229 while (ptr) {
1211 arg = ptr; 1230 arg = ptr;
@@ -1467,8 +1486,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
1467} 1486}
1468 1487
1469/* Parse probe_events event into struct probe_point */ 1488/* Parse probe_events event into struct probe_point */
1470static int parse_probe_trace_command(const char *cmd, 1489int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
1471 struct probe_trace_event *tev)
1472{ 1490{
1473 struct probe_trace_point *tp = &tev->point; 1491 struct probe_trace_point *tp = &tev->point;
1474 char pr; 1492 char pr;
@@ -1523,9 +1541,31 @@ static int parse_probe_trace_command(const char *cmd,
1523 } else 1541 } else
1524 p = argv[1]; 1542 p = argv[1];
1525 fmt1_str = strtok_r(p, "+", &fmt); 1543 fmt1_str = strtok_r(p, "+", &fmt);
1526 if (fmt1_str[0] == '0') /* only the address started with 0x */ 1544 /* only the address started with 0x */
1527 tp->address = strtoul(fmt1_str, NULL, 0); 1545 if (fmt1_str[0] == '0') {
1528 else { 1546 /*
1547 * Fix a special case:
1548 * if address == 0, kernel reports something like:
1549 * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax
1550 * Newer kernel may fix that, but we want to
1551 * support old kernel also.
1552 */
1553 if (strcmp(fmt1_str, "0x") == 0) {
1554 if (!argv[2] || strcmp(argv[2], "(null)")) {
1555 ret = -EINVAL;
1556 goto out;
1557 }
1558 tp->address = 0;
1559
1560 free(argv[2]);
1561 for (i = 2; argv[i + 1] != NULL; i++)
1562 argv[i] = argv[i + 1];
1563
1564 argv[i] = NULL;
1565 argc -= 1;
1566 } else
1567 tp->address = strtoul(fmt1_str, NULL, 0);
1568 } else {
1529 /* Only the symbol-based probe has offset */ 1569 /* Only the symbol-based probe has offset */
1530 tp->symbol = strdup(fmt1_str); 1570 tp->symbol = strdup(fmt1_str);
1531 if (tp->symbol == NULL) { 1571 if (tp->symbol == NULL) {
@@ -1782,14 +1822,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
1782 if (len <= 0) 1822 if (len <= 0)
1783 goto error; 1823 goto error;
1784 1824
1785 /* Uprobes must have tp->address and tp->module */ 1825 /* Uprobes must have tp->module */
1786 if (tev->uprobes && (!tp->address || !tp->module)) 1826 if (tev->uprobes && !tp->module)
1787 goto error; 1827 goto error;
1828 /*
1829 * If tp->address == 0, then this point must be a
1830 * absolute address uprobe.
1831 * try_to_find_absolute_address() should have made
1832 * tp->symbol to "0x0".
1833 */
1834 if (tev->uprobes && !tp->address) {
1835 if (!tp->symbol || strcmp(tp->symbol, "0x0"))
1836 goto error;
1837 }
1788 1838
1789 /* Use the tp->address for uprobes */ 1839 /* Use the tp->address for uprobes */
1790 if (tev->uprobes) 1840 if (tev->uprobes)
1791 ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", 1841 ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx",
1792 tp->module, tp->address); 1842 tp->module, tp->address);
1843 else if (!strncmp(tp->symbol, "0x", 2))
1844 /* Absolute address. See try_to_find_absolute_address() */
1845 ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx",
1846 tp->module ?: "", tp->module ? ":" : "",
1847 tp->address);
1793 else 1848 else
1794 ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", 1849 ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu",
1795 tp->module ?: "", tp->module ? ":" : "", 1850 tp->module ?: "", tp->module ? ":" : "",
@@ -1819,17 +1874,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
1819{ 1874{
1820 struct symbol *sym = NULL; 1875 struct symbol *sym = NULL;
1821 struct map *map; 1876 struct map *map;
1822 u64 addr; 1877 u64 addr = tp->address;
1823 int ret = -ENOENT; 1878 int ret = -ENOENT;
1824 1879
1825 if (!is_kprobe) { 1880 if (!is_kprobe) {
1826 map = dso__new_map(tp->module); 1881 map = dso__new_map(tp->module);
1827 if (!map) 1882 if (!map)
1828 goto out; 1883 goto out;
1829 addr = tp->address;
1830 sym = map__find_symbol(map, addr, NULL); 1884 sym = map__find_symbol(map, addr, NULL);
1831 } else { 1885 } else {
1832 addr = kernel_get_symbol_address_by_name(tp->symbol, true); 1886 if (tp->symbol)
1887 addr = kernel_get_symbol_address_by_name(tp->symbol, true);
1833 if (addr) { 1888 if (addr) {
1834 addr += tp->offset; 1889 addr += tp->offset;
1835 sym = __find_kernel_function(addr, &map); 1890 sym = __find_kernel_function(addr, &map);
@@ -1852,8 +1907,8 @@ out:
1852} 1907}
1853 1908
1854static int convert_to_perf_probe_point(struct probe_trace_point *tp, 1909static int convert_to_perf_probe_point(struct probe_trace_point *tp,
1855 struct perf_probe_point *pp, 1910 struct perf_probe_point *pp,
1856 bool is_kprobe) 1911 bool is_kprobe)
1857{ 1912{
1858 char buf[128]; 1913 char buf[128];
1859 int ret; 1914 int ret;
@@ -1870,7 +1925,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp,
1870 if (tp->symbol) { 1925 if (tp->symbol) {
1871 pp->function = strdup(tp->symbol); 1926 pp->function = strdup(tp->symbol);
1872 pp->offset = tp->offset; 1927 pp->offset = tp->offset;
1873 } else if (!tp->module && !is_kprobe) { 1928 } else {
1874 ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); 1929 ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address);
1875 if (ret < 0) 1930 if (ret < 0)
1876 return ret; 1931 return ret;
@@ -1951,7 +2006,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
1951 memset(pev, 0, sizeof(*pev)); 2006 memset(pev, 0, sizeof(*pev));
1952} 2007}
1953 2008
1954static void clear_probe_trace_event(struct probe_trace_event *tev) 2009void clear_probe_trace_event(struct probe_trace_event *tev)
1955{ 2010{
1956 struct probe_trace_arg_ref *ref, *next; 2011 struct probe_trace_arg_ref *ref, *next;
1957 int i; 2012 int i;
@@ -1976,119 +2031,6 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
1976 memset(tev, 0, sizeof(*tev)); 2031 memset(tev, 0, sizeof(*tev));
1977} 2032}
1978 2033
1979static void print_open_warning(int err, bool is_kprobe)
1980{
1981 char sbuf[STRERR_BUFSIZE];
1982
1983 if (err == -ENOENT) {
1984 const char *config;
1985
1986 if (!is_kprobe)
1987 config = "CONFIG_UPROBE_EVENTS";
1988 else
1989 config = "CONFIG_KPROBE_EVENTS";
1990
1991 pr_warning("%cprobe_events file does not exist"
1992 " - please rebuild kernel with %s.\n",
1993 is_kprobe ? 'k' : 'u', config);
1994 } else if (err == -ENOTSUP)
1995 pr_warning("Tracefs or debugfs is not mounted.\n");
1996 else
1997 pr_warning("Failed to open %cprobe_events: %s\n",
1998 is_kprobe ? 'k' : 'u',
1999 strerror_r(-err, sbuf, sizeof(sbuf)));
2000}
2001
2002static void print_both_open_warning(int kerr, int uerr)
2003{
2004 /* Both kprobes and uprobes are disabled, warn it. */
2005 if (kerr == -ENOTSUP && uerr == -ENOTSUP)
2006 pr_warning("Tracefs or debugfs is not mounted.\n");
2007 else if (kerr == -ENOENT && uerr == -ENOENT)
2008 pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
2009 "or/and CONFIG_UPROBE_EVENTS.\n");
2010 else {
2011 char sbuf[STRERR_BUFSIZE];
2012 pr_warning("Failed to open kprobe events: %s.\n",
2013 strerror_r(-kerr, sbuf, sizeof(sbuf)));
2014 pr_warning("Failed to open uprobe events: %s.\n",
2015 strerror_r(-uerr, sbuf, sizeof(sbuf)));
2016 }
2017}
2018
2019static int open_probe_events(const char *trace_file, bool readwrite)
2020{
2021 char buf[PATH_MAX];
2022 const char *__debugfs;
2023 const char *tracing_dir = "";
2024 int ret;
2025
2026 __debugfs = tracefs_find_mountpoint();
2027 if (__debugfs == NULL) {
2028 tracing_dir = "tracing/";
2029
2030 __debugfs = debugfs_find_mountpoint();
2031 if (__debugfs == NULL)
2032 return -ENOTSUP;
2033 }
2034
2035 ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
2036 __debugfs, tracing_dir, trace_file);
2037 if (ret >= 0) {
2038 pr_debug("Opening %s write=%d\n", buf, readwrite);
2039 if (readwrite && !probe_event_dry_run)
2040 ret = open(buf, O_RDWR | O_APPEND, 0);
2041 else
2042 ret = open(buf, O_RDONLY, 0);
2043
2044 if (ret < 0)
2045 ret = -errno;
2046 }
2047 return ret;
2048}
2049
2050static int open_kprobe_events(bool readwrite)
2051{
2052 return open_probe_events("kprobe_events", readwrite);
2053}
2054
2055static int open_uprobe_events(bool readwrite)
2056{
2057 return open_probe_events("uprobe_events", readwrite);
2058}
2059
2060/* Get raw string list of current kprobe_events or uprobe_events */
2061static struct strlist *get_probe_trace_command_rawlist(int fd)
2062{
2063 int ret, idx;
2064 FILE *fp;
2065 char buf[MAX_CMDLEN];
2066 char *p;
2067 struct strlist *sl;
2068
2069 sl = strlist__new(true, NULL);
2070
2071 fp = fdopen(dup(fd), "r");
2072 while (!feof(fp)) {
2073 p = fgets(buf, MAX_CMDLEN, fp);
2074 if (!p)
2075 break;
2076
2077 idx = strlen(p) - 1;
2078 if (p[idx] == '\n')
2079 p[idx] = '\0';
2080 ret = strlist__add(sl, buf);
2081 if (ret < 0) {
2082 pr_debug("strlist__add failed (%d)\n", ret);
2083 strlist__delete(sl);
2084 return NULL;
2085 }
2086 }
2087 fclose(fp);
2088
2089 return sl;
2090}
2091
2092struct kprobe_blacklist_node { 2034struct kprobe_blacklist_node {
2093 struct list_head list; 2035 struct list_head list;
2094 unsigned long start; 2036 unsigned long start;
@@ -2284,7 +2226,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe,
2284 memset(&tev, 0, sizeof(tev)); 2226 memset(&tev, 0, sizeof(tev));
2285 memset(&pev, 0, sizeof(pev)); 2227 memset(&pev, 0, sizeof(pev));
2286 2228
2287 rawlist = get_probe_trace_command_rawlist(fd); 2229 rawlist = probe_file__get_rawlist(fd);
2288 if (!rawlist) 2230 if (!rawlist)
2289 return -ENOMEM; 2231 return -ENOMEM;
2290 2232
@@ -2325,89 +2267,20 @@ int show_perf_probe_events(struct strfilter *filter)
2325 if (ret < 0) 2267 if (ret < 0)
2326 return ret; 2268 return ret;
2327 2269
2328 kp_fd = open_kprobe_events(false); 2270 ret = probe_file__open_both(&kp_fd, &up_fd, 0);
2329 if (kp_fd >= 0) { 2271 if (ret < 0)
2330 ret = __show_perf_probe_events(kp_fd, true, filter); 2272 return ret;
2331 close(kp_fd);
2332 if (ret < 0)
2333 goto out;
2334 }
2335
2336 up_fd = open_uprobe_events(false);
2337 if (kp_fd < 0 && up_fd < 0) {
2338 print_both_open_warning(kp_fd, up_fd);
2339 ret = kp_fd;
2340 goto out;
2341 }
2342 2273
2343 if (up_fd >= 0) { 2274 if (kp_fd >= 0)
2275 ret = __show_perf_probe_events(kp_fd, true, filter);
2276 if (up_fd >= 0 && ret >= 0)
2344 ret = __show_perf_probe_events(up_fd, false, filter); 2277 ret = __show_perf_probe_events(up_fd, false, filter);
2278 if (kp_fd > 0)
2279 close(kp_fd);
2280 if (up_fd > 0)
2345 close(up_fd); 2281 close(up_fd);
2346 }
2347out:
2348 exit_symbol_maps(); 2282 exit_symbol_maps();
2349 return ret;
2350}
2351 2283
2352/* Get current perf-probe event names */
2353static struct strlist *get_probe_trace_event_names(int fd, bool include_group)
2354{
2355 char buf[128];
2356 struct strlist *sl, *rawlist;
2357 struct str_node *ent;
2358 struct probe_trace_event tev;
2359 int ret = 0;
2360
2361 memset(&tev, 0, sizeof(tev));
2362 rawlist = get_probe_trace_command_rawlist(fd);
2363 if (!rawlist)
2364 return NULL;
2365 sl = strlist__new(true, NULL);
2366 strlist__for_each(ent, rawlist) {
2367 ret = parse_probe_trace_command(ent->s, &tev);
2368 if (ret < 0)
2369 break;
2370 if (include_group) {
2371 ret = e_snprintf(buf, 128, "%s:%s", tev.group,
2372 tev.event);
2373 if (ret >= 0)
2374 ret = strlist__add(sl, buf);
2375 } else
2376 ret = strlist__add(sl, tev.event);
2377 clear_probe_trace_event(&tev);
2378 if (ret < 0)
2379 break;
2380 }
2381 strlist__delete(rawlist);
2382
2383 if (ret < 0) {
2384 strlist__delete(sl);
2385 return NULL;
2386 }
2387 return sl;
2388}
2389
2390static int write_probe_trace_event(int fd, struct probe_trace_event *tev)
2391{
2392 int ret = 0;
2393 char *buf = synthesize_probe_trace_command(tev);
2394 char sbuf[STRERR_BUFSIZE];
2395
2396 if (!buf) {
2397 pr_debug("Failed to synthesize probe trace event.\n");
2398 return -EINVAL;
2399 }
2400
2401 pr_debug("Writing event: %s\n", buf);
2402 if (!probe_event_dry_run) {
2403 ret = write(fd, buf, strlen(buf));
2404 if (ret <= 0) {
2405 ret = -errno;
2406 pr_warning("Failed to write event: %s\n",
2407 strerror_r(errno, sbuf, sizeof(sbuf)));
2408 }
2409 }
2410 free(buf);
2411 return ret; 2284 return ret;
2412} 2285}
2413 2286
@@ -2478,36 +2351,69 @@ out:
2478 free(buf); 2351 free(buf);
2479} 2352}
2480 2353
2354/* Set new name from original perf_probe_event and namelist */
2355static int probe_trace_event__set_name(struct probe_trace_event *tev,
2356 struct perf_probe_event *pev,
2357 struct strlist *namelist,
2358 bool allow_suffix)
2359{
2360 const char *event, *group;
2361 char buf[64];
2362 int ret;
2363
2364 if (pev->event)
2365 event = pev->event;
2366 else
2367 if (pev->point.function &&
2368 (strncmp(pev->point.function, "0x", 2) != 0) &&
2369 !strisglob(pev->point.function))
2370 event = pev->point.function;
2371 else
2372 event = tev->point.realname;
2373 if (pev->group)
2374 group = pev->group;
2375 else
2376 group = PERFPROBE_GROUP;
2377
2378 /* Get an unused new event name */
2379 ret = get_new_event_name(buf, 64, event,
2380 namelist, allow_suffix);
2381 if (ret < 0)
2382 return ret;
2383
2384 event = buf;
2385
2386 tev->event = strdup(event);
2387 tev->group = strdup(group);
2388 if (tev->event == NULL || tev->group == NULL)
2389 return -ENOMEM;
2390
2391 /* Add added event name to namelist */
2392 strlist__add(namelist, event);
2393 return 0;
2394}
2395
2481static int __add_probe_trace_events(struct perf_probe_event *pev, 2396static int __add_probe_trace_events(struct perf_probe_event *pev,
2482 struct probe_trace_event *tevs, 2397 struct probe_trace_event *tevs,
2483 int ntevs, bool allow_suffix) 2398 int ntevs, bool allow_suffix)
2484{ 2399{
2485 int i, fd, ret; 2400 int i, fd, ret;
2486 struct probe_trace_event *tev = NULL; 2401 struct probe_trace_event *tev = NULL;
2487 char buf[64];
2488 const char *event = NULL, *group = NULL; 2402 const char *event = NULL, *group = NULL;
2489 struct strlist *namelist; 2403 struct strlist *namelist;
2490 bool safename;
2491
2492 if (pev->uprobes)
2493 fd = open_uprobe_events(true);
2494 else
2495 fd = open_kprobe_events(true);
2496 2404
2497 if (fd < 0) { 2405 fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0));
2498 print_open_warning(fd, !pev->uprobes); 2406 if (fd < 0)
2499 return fd; 2407 return fd;
2500 }
2501 2408
2502 /* Get current event names */ 2409 /* Get current event names */
2503 namelist = get_probe_trace_event_names(fd, false); 2410 namelist = probe_file__get_namelist(fd);
2504 if (!namelist) { 2411 if (!namelist) {
2505 pr_debug("Failed to get current event list.\n"); 2412 pr_debug("Failed to get current event list.\n");
2506 ret = -ENOMEM; 2413 ret = -ENOMEM;
2507 goto close_out; 2414 goto close_out;
2508 } 2415 }
2509 2416
2510 safename = (pev->point.function && !strisglob(pev->point.function));
2511 ret = 0; 2417 ret = 0;
2512 pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); 2418 pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
2513 for (i = 0; i < ntevs; i++) { 2419 for (i = 0; i < ntevs; i++) {
@@ -2516,36 +2422,15 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
2516 if (!tev->point.symbol) 2422 if (!tev->point.symbol)
2517 continue; 2423 continue;
2518 2424
2519 if (pev->event) 2425 /* Set new name for tev (and update namelist) */
2520 event = pev->event; 2426 ret = probe_trace_event__set_name(tev, pev, namelist,
2521 else 2427 allow_suffix);
2522 if (safename)
2523 event = pev->point.function;
2524 else
2525 event = tev->point.realname;
2526 if (pev->group)
2527 group = pev->group;
2528 else
2529 group = PERFPROBE_GROUP;
2530
2531 /* Get an unused new event name */
2532 ret = get_new_event_name(buf, 64, event,
2533 namelist, allow_suffix);
2534 if (ret < 0) 2428 if (ret < 0)
2535 break; 2429 break;
2536 event = buf;
2537 2430
2538 tev->event = strdup(event); 2431 ret = probe_file__add_event(fd, tev);
2539 tev->group = strdup(group);
2540 if (tev->event == NULL || tev->group == NULL) {
2541 ret = -ENOMEM;
2542 break;
2543 }
2544 ret = write_probe_trace_event(fd, tev);
2545 if (ret < 0) 2432 if (ret < 0)
2546 break; 2433 break;
2547 /* Add added event name to namelist */
2548 strlist__add(namelist, event);
2549 2434
2550 /* We use tev's name for showing new events */ 2435 /* We use tev's name for showing new events */
2551 show_perf_probe_event(tev->group, tev->event, pev, 2436 show_perf_probe_event(tev->group, tev->event, pev,
@@ -2748,6 +2633,98 @@ err_out:
2748 goto out; 2633 goto out;
2749} 2634}
2750 2635
2636static int try_to_find_absolute_address(struct perf_probe_event *pev,
2637 struct probe_trace_event **tevs)
2638{
2639 struct perf_probe_point *pp = &pev->point;
2640 struct probe_trace_event *tev;
2641 struct probe_trace_point *tp;
2642 int i, err;
2643
2644 if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
2645 return -EINVAL;
2646 if (perf_probe_event_need_dwarf(pev))
2647 return -EINVAL;
2648
2649 /*
2650 * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
2651 * absolute address.
2652 *
2653 * Only one tev can be generated by this.
2654 */
2655 *tevs = zalloc(sizeof(*tev));
2656 if (!*tevs)
2657 return -ENOMEM;
2658
2659 tev = *tevs;
2660 tp = &tev->point;
2661
2662 /*
2663 * Don't use tp->offset, use address directly, because
2664 * in synthesize_probe_trace_command() address cannot be
2665 * zero.
2666 */
2667 tp->address = pev->point.abs_address;
2668 tp->retprobe = pp->retprobe;
2669 tev->uprobes = pev->uprobes;
2670
2671 err = -ENOMEM;
2672 /*
2673 * Give it a '0x' leading symbol name.
2674 * In __add_probe_trace_events, a NULL symbol is interpreted as
2675 * invalud.
2676 */
2677 if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0)
2678 goto errout;
2679
2680 /* For kprobe, check range */
2681 if ((!tev->uprobes) &&
2682 (kprobe_warn_out_range(tev->point.symbol,
2683 tev->point.address))) {
2684 err = -EACCES;
2685 goto errout;
2686 }
2687
2688 if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0)
2689 goto errout;
2690
2691 if (pev->target) {
2692 tp->module = strdup(pev->target);
2693 if (!tp->module)
2694 goto errout;
2695 }
2696
2697 if (tev->group) {
2698 tev->group = strdup(pev->group);
2699 if (!tev->group)
2700 goto errout;
2701 }
2702
2703 if (pev->event) {
2704 tev->event = strdup(pev->event);
2705 if (!tev->event)
2706 goto errout;
2707 }
2708
2709 tev->nargs = pev->nargs;
2710 tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
2711 if (!tev->args) {
2712 err = -ENOMEM;
2713 goto errout;
2714 }
2715 for (i = 0; i < tev->nargs; i++)
2716 copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
2717
2718 return 1;
2719
2720errout:
2721 if (*tevs) {
2722 clear_probe_trace_events(*tevs, 1);
2723 *tevs = NULL;
2724 }
2725 return err;
2726}
2727
2751bool __weak arch__prefers_symtab(void) { return false; } 2728bool __weak arch__prefers_symtab(void) { return false; }
2752 2729
2753static int convert_to_probe_trace_events(struct perf_probe_event *pev, 2730static int convert_to_probe_trace_events(struct perf_probe_event *pev,
@@ -2764,6 +2741,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
2764 } 2741 }
2765 } 2742 }
2766 2743
2744 ret = try_to_find_absolute_address(pev, tevs);
2745 if (ret > 0)
2746 return ret;
2747
2767 if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { 2748 if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
2768 ret = find_probe_trace_events_from_map(pev, tevs); 2749 ret = find_probe_trace_events_from_map(pev, tevs);
2769 if (ret > 0) 2750 if (ret > 0)
@@ -2838,68 +2819,9 @@ end:
2838 return ret; 2819 return ret;
2839} 2820}
2840 2821
2841static int __del_trace_probe_event(int fd, struct str_node *ent)
2842{
2843 char *p;
2844 char buf[128];
2845 int ret;
2846
2847 /* Convert from perf-probe event to trace-probe event */
2848 ret = e_snprintf(buf, 128, "-:%s", ent->s);
2849 if (ret < 0)
2850 goto error;
2851
2852 p = strchr(buf + 2, ':');
2853 if (!p) {
2854 pr_debug("Internal error: %s should have ':' but not.\n",
2855 ent->s);
2856 ret = -ENOTSUP;
2857 goto error;
2858 }
2859 *p = '/';
2860
2861 pr_debug("Writing event: %s\n", buf);
2862 ret = write(fd, buf, strlen(buf));
2863 if (ret < 0) {
2864 ret = -errno;
2865 goto error;
2866 }
2867
2868 pr_info("Removed event: %s\n", ent->s);
2869 return 0;
2870error:
2871 pr_warning("Failed to delete event: %s\n",
2872 strerror_r(-ret, buf, sizeof(buf)));
2873 return ret;
2874}
2875
2876static int del_trace_probe_events(int fd, struct strfilter *filter,
2877 struct strlist *namelist)
2878{
2879 struct str_node *ent;
2880 const char *p;
2881 int ret = -ENOENT;
2882
2883 if (!namelist)
2884 return -ENOENT;
2885
2886 strlist__for_each(ent, namelist) {
2887 p = strchr(ent->s, ':');
2888 if ((p && strfilter__compare(filter, p + 1)) ||
2889 strfilter__compare(filter, ent->s)) {
2890 ret = __del_trace_probe_event(fd, ent);
2891 if (ret < 0)
2892 break;
2893 }
2894 }
2895
2896 return ret;
2897}
2898
2899int del_perf_probe_events(struct strfilter *filter) 2822int del_perf_probe_events(struct strfilter *filter)
2900{ 2823{
2901 int ret, ret2, ufd = -1, kfd = -1; 2824 int ret, ret2, ufd = -1, kfd = -1;
2902 struct strlist *namelist = NULL, *unamelist = NULL;
2903 char *str = strfilter__string(filter); 2825 char *str = strfilter__string(filter);
2904 2826
2905 if (!str) 2827 if (!str)
@@ -2908,25 +2830,15 @@ int del_perf_probe_events(struct strfilter *filter)
2908 pr_debug("Delete filter: \'%s\'\n", str); 2830 pr_debug("Delete filter: \'%s\'\n", str);
2909 2831
2910 /* Get current event names */ 2832 /* Get current event names */
2911 kfd = open_kprobe_events(true); 2833 ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
2912 if (kfd >= 0) 2834 if (ret < 0)
2913 namelist = get_probe_trace_event_names(kfd, true); 2835 goto out;
2914
2915 ufd = open_uprobe_events(true);
2916 if (ufd >= 0)
2917 unamelist = get_probe_trace_event_names(ufd, true);
2918
2919 if (kfd < 0 && ufd < 0) {
2920 print_both_open_warning(kfd, ufd);
2921 ret = kfd;
2922 goto error;
2923 }
2924 2836
2925 ret = del_trace_probe_events(kfd, filter, namelist); 2837 ret = probe_file__del_events(kfd, filter);
2926 if (ret < 0 && ret != -ENOENT) 2838 if (ret < 0 && ret != -ENOENT)
2927 goto error; 2839 goto error;
2928 2840
2929 ret2 = del_trace_probe_events(ufd, filter, unamelist); 2841 ret2 = probe_file__del_events(ufd, filter);
2930 if (ret2 < 0 && ret2 != -ENOENT) { 2842 if (ret2 < 0 && ret2 != -ENOENT) {
2931 ret = ret2; 2843 ret = ret2;
2932 goto error; 2844 goto error;
@@ -2937,15 +2849,11 @@ int del_perf_probe_events(struct strfilter *filter)
2937 ret = 0; 2849 ret = 0;
2938 2850
2939error: 2851error:
2940 if (kfd >= 0) { 2852 if (kfd >= 0)
2941 strlist__delete(namelist);
2942 close(kfd); 2853 close(kfd);
2943 } 2854 if (ufd >= 0)
2944
2945 if (ufd >= 0) {
2946 strlist__delete(unamelist);
2947 close(ufd); 2855 close(ufd);
2948 } 2856out:
2949 free(str); 2857 free(str);
2950 2858
2951 return ret; 2859 return ret;
@@ -3007,3 +2915,22 @@ end:
3007 return ret; 2915 return ret;
3008} 2916}
3009 2917
2918int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
2919 struct perf_probe_arg *pvar)
2920{
2921 tvar->value = strdup(pvar->var);
2922 if (tvar->value == NULL)
2923 return -ENOMEM;
2924 if (pvar->type) {
2925 tvar->type = strdup(pvar->type);
2926 if (tvar->type == NULL)
2927 return -ENOMEM;
2928 }
2929 if (pvar->name) {
2930 tvar->name = strdup(pvar->name);
2931 if (tvar->name == NULL)
2932 return -ENOMEM;
2933 } else
2934 tvar->name = NULL;
2935 return 0;
2936}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 31db6ee7db54..6e7ec68a4aa8 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -59,6 +59,7 @@ struct perf_probe_point {
59 bool retprobe; /* Return probe flag */ 59 bool retprobe; /* Return probe flag */
60 char *lazy_line; /* Lazy matching pattern */ 60 char *lazy_line; /* Lazy matching pattern */
61 unsigned long offset; /* Offset from function entry */ 61 unsigned long offset; /* Offset from function entry */
62 unsigned long abs_address; /* Absolute address of the point */
62}; 63};
63 64
64/* Perf probe probing argument field chain */ 65/* Perf probe probing argument field chain */
@@ -106,9 +107,13 @@ struct variable_list {
106 struct strlist *vars; /* Available variables */ 107 struct strlist *vars; /* Available variables */
107}; 108};
108 109
110struct map;
111
109/* Command string to events */ 112/* Command string to events */
110extern int parse_perf_probe_command(const char *cmd, 113extern int parse_perf_probe_command(const char *cmd,
111 struct perf_probe_event *pev); 114 struct perf_probe_event *pev);
115extern int parse_probe_trace_command(const char *cmd,
116 struct probe_trace_event *tev);
112 117
113/* Events to command string */ 118/* Events to command string */
114extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); 119extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
@@ -121,6 +126,7 @@ extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
121 126
122/* Release event contents */ 127/* Release event contents */
123extern void clear_perf_probe_event(struct perf_probe_event *pev); 128extern void clear_perf_probe_event(struct perf_probe_event *pev);
129extern void clear_probe_trace_event(struct probe_trace_event *tev);
124 130
125/* Command string to line-range */ 131/* Command string to line-range */
126extern int parse_line_range_desc(const char *cmd, struct line_range *lr); 132extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
@@ -144,7 +150,14 @@ bool arch__prefers_symtab(void);
144void arch__fix_tev_from_maps(struct perf_probe_event *pev, 150void arch__fix_tev_from_maps(struct perf_probe_event *pev,
145 struct probe_trace_event *tev, struct map *map); 151 struct probe_trace_event *tev, struct map *map);
146 152
153/* If there is no space to write, returns -E2BIG. */
154int e_snprintf(char *str, size_t size, const char *format, ...)
155 __attribute__((format(printf, 3, 4)));
156
147/* Maximum index number of event-name postfix */ 157/* Maximum index number of event-name postfix */
148#define MAX_EVENT_INDEX 1024 158#define MAX_EVENT_INDEX 1024
149 159
160int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
161 struct perf_probe_arg *pvar);
162
150#endif /*_PROBE_EVENT_H */ 163#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
new file mode 100644
index 000000000000..bbb243717ec8
--- /dev/null
+++ b/tools/perf/util/probe-file.c
@@ -0,0 +1,301 @@
1/*
2 * probe-file.c : operate ftrace k/uprobe events files
3 *
4 * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 */
17#include "util.h"
18#include "event.h"
19#include "strlist.h"
20#include "debug.h"
21#include "cache.h"
22#include "color.h"
23#include "symbol.h"
24#include "thread.h"
25#include <api/fs/debugfs.h>
26#include <api/fs/tracefs.h>
27#include "probe-event.h"
28#include "probe-file.h"
29#include "session.h"
30
31#define MAX_CMDLEN 256
32
33static void print_open_warning(int err, bool uprobe)
34{
35 char sbuf[STRERR_BUFSIZE];
36
37 if (err == -ENOENT) {
38 const char *config;
39
40 if (uprobe)
41 config = "CONFIG_UPROBE_EVENTS";
42 else
43 config = "CONFIG_KPROBE_EVENTS";
44
45 pr_warning("%cprobe_events file does not exist"
46 " - please rebuild kernel with %s.\n",
47 uprobe ? 'u' : 'k', config);
48 } else if (err == -ENOTSUP)
49 pr_warning("Tracefs or debugfs is not mounted.\n");
50 else
51 pr_warning("Failed to open %cprobe_events: %s\n",
52 uprobe ? 'u' : 'k',
53 strerror_r(-err, sbuf, sizeof(sbuf)));
54}
55
56static void print_both_open_warning(int kerr, int uerr)
57{
58 /* Both kprobes and uprobes are disabled, warn it. */
59 if (kerr == -ENOTSUP && uerr == -ENOTSUP)
60 pr_warning("Tracefs or debugfs is not mounted.\n");
61 else if (kerr == -ENOENT && uerr == -ENOENT)
62 pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
63 "or/and CONFIG_UPROBE_EVENTS.\n");
64 else {
65 char sbuf[STRERR_BUFSIZE];
66 pr_warning("Failed to open kprobe events: %s.\n",
67 strerror_r(-kerr, sbuf, sizeof(sbuf)));
68 pr_warning("Failed to open uprobe events: %s.\n",
69 strerror_r(-uerr, sbuf, sizeof(sbuf)));
70 }
71}
72
73static int open_probe_events(const char *trace_file, bool readwrite)
74{
75 char buf[PATH_MAX];
76 const char *__debugfs;
77 const char *tracing_dir = "";
78 int ret;
79
80 __debugfs = tracefs_find_mountpoint();
81 if (__debugfs == NULL) {
82 tracing_dir = "tracing/";
83
84 __debugfs = debugfs_find_mountpoint();
85 if (__debugfs == NULL)
86 return -ENOTSUP;
87 }
88
89 ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
90 __debugfs, tracing_dir, trace_file);
91 if (ret >= 0) {
92 pr_debug("Opening %s write=%d\n", buf, readwrite);
93 if (readwrite && !probe_event_dry_run)
94 ret = open(buf, O_RDWR | O_APPEND, 0);
95 else
96 ret = open(buf, O_RDONLY, 0);
97
98 if (ret < 0)
99 ret = -errno;
100 }
101 return ret;
102}
103
104static int open_kprobe_events(bool readwrite)
105{
106 return open_probe_events("kprobe_events", readwrite);
107}
108
109static int open_uprobe_events(bool readwrite)
110{
111 return open_probe_events("uprobe_events", readwrite);
112}
113
114int probe_file__open(int flag)
115{
116 int fd;
117
118 if (flag & PF_FL_UPROBE)
119 fd = open_uprobe_events(flag & PF_FL_RW);
120 else
121 fd = open_kprobe_events(flag & PF_FL_RW);
122 if (fd < 0)
123 print_open_warning(fd, flag & PF_FL_UPROBE);
124
125 return fd;
126}
127
128int probe_file__open_both(int *kfd, int *ufd, int flag)
129{
130 if (!kfd || !ufd)
131 return -EINVAL;
132
133 *kfd = open_kprobe_events(flag & PF_FL_RW);
134 *ufd = open_uprobe_events(flag & PF_FL_RW);
135 if (*kfd < 0 && *ufd < 0) {
136 print_both_open_warning(*kfd, *ufd);
137 return *kfd;
138 }
139
140 return 0;
141}
142
143/* Get raw string list of current kprobe_events or uprobe_events */
144struct strlist *probe_file__get_rawlist(int fd)
145{
146 int ret, idx;
147 FILE *fp;
148 char buf[MAX_CMDLEN];
149 char *p;
150 struct strlist *sl;
151
152 sl = strlist__new(NULL, NULL);
153
154 fp = fdopen(dup(fd), "r");
155 while (!feof(fp)) {
156 p = fgets(buf, MAX_CMDLEN, fp);
157 if (!p)
158 break;
159
160 idx = strlen(p) - 1;
161 if (p[idx] == '\n')
162 p[idx] = '\0';
163 ret = strlist__add(sl, buf);
164 if (ret < 0) {
165 pr_debug("strlist__add failed (%d)\n", ret);
166 strlist__delete(sl);
167 return NULL;
168 }
169 }
170 fclose(fp);
171
172 return sl;
173}
174
175static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
176{
177 char buf[128];
178 struct strlist *sl, *rawlist;
179 struct str_node *ent;
180 struct probe_trace_event tev;
181 int ret = 0;
182
183 memset(&tev, 0, sizeof(tev));
184 rawlist = probe_file__get_rawlist(fd);
185 if (!rawlist)
186 return NULL;
187 sl = strlist__new(NULL, NULL);
188 strlist__for_each(ent, rawlist) {
189 ret = parse_probe_trace_command(ent->s, &tev);
190 if (ret < 0)
191 break;
192 if (include_group) {
193 ret = e_snprintf(buf, 128, "%s:%s", tev.group,
194 tev.event);
195 if (ret >= 0)
196 ret = strlist__add(sl, buf);
197 } else
198 ret = strlist__add(sl, tev.event);
199 clear_probe_trace_event(&tev);
200 if (ret < 0)
201 break;
202 }
203 strlist__delete(rawlist);
204
205 if (ret < 0) {
206 strlist__delete(sl);
207 return NULL;
208 }
209 return sl;
210}
211
212/* Get current perf-probe event names */
213struct strlist *probe_file__get_namelist(int fd)
214{
215 return __probe_file__get_namelist(fd, false);
216}
217
218int probe_file__add_event(int fd, struct probe_trace_event *tev)
219{
220 int ret = 0;
221 char *buf = synthesize_probe_trace_command(tev);
222 char sbuf[STRERR_BUFSIZE];
223
224 if (!buf) {
225 pr_debug("Failed to synthesize probe trace event.\n");
226 return -EINVAL;
227 }
228
229 pr_debug("Writing event: %s\n", buf);
230 if (!probe_event_dry_run) {
231 ret = write(fd, buf, strlen(buf));
232 if (ret <= 0) {
233 ret = -errno;
234 pr_warning("Failed to write event: %s\n",
235 strerror_r(errno, sbuf, sizeof(sbuf)));
236 }
237 }
238 free(buf);
239
240 return ret;
241}
242
243static int __del_trace_probe_event(int fd, struct str_node *ent)
244{
245 char *p;
246 char buf[128];
247 int ret;
248
249 /* Convert from perf-probe event to trace-probe event */
250 ret = e_snprintf(buf, 128, "-:%s", ent->s);
251 if (ret < 0)
252 goto error;
253
254 p = strchr(buf + 2, ':');
255 if (!p) {
256 pr_debug("Internal error: %s should have ':' but not.\n",
257 ent->s);
258 ret = -ENOTSUP;
259 goto error;
260 }
261 *p = '/';
262
263 pr_debug("Writing event: %s\n", buf);
264 ret = write(fd, buf, strlen(buf));
265 if (ret < 0) {
266 ret = -errno;
267 goto error;
268 }
269
270 pr_info("Removed event: %s\n", ent->s);
271 return 0;
272error:
273 pr_warning("Failed to delete event: %s\n",
274 strerror_r(-ret, buf, sizeof(buf)));
275 return ret;
276}
277
278int probe_file__del_events(int fd, struct strfilter *filter)
279{
280 struct strlist *namelist;
281 struct str_node *ent;
282 const char *p;
283 int ret = -ENOENT;
284
285 namelist = __probe_file__get_namelist(fd, true);
286 if (!namelist)
287 return -ENOENT;
288
289 strlist__for_each(ent, namelist) {
290 p = strchr(ent->s, ':');
291 if ((p && strfilter__compare(filter, p + 1)) ||
292 strfilter__compare(filter, ent->s)) {
293 ret = __del_trace_probe_event(fd, ent);
294 if (ret < 0)
295 break;
296 }
297 }
298 strlist__delete(namelist);
299
300 return ret;
301}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
new file mode 100644
index 000000000000..ada94a242a17
--- /dev/null
+++ b/tools/perf/util/probe-file.h
@@ -0,0 +1,18 @@
1#ifndef __PROBE_FILE_H
2#define __PROBE_FILE_H
3
4#include "strlist.h"
5#include "strfilter.h"
6#include "probe-event.h"
7
8#define PF_FL_UPROBE 1
9#define PF_FL_RW 2
10
11int probe_file__open(int flag);
12int probe_file__open_both(int *kfd, int *ufd, int flag);
13struct strlist *probe_file__get_namelist(int fd);
14struct strlist *probe_file__get_rawlist(int fd);
15int probe_file__add_event(int fd, struct probe_trace_event *tev);
16int probe_file__del_events(int fd, struct strfilter *filter);
17
18#endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 2da65a710893..29c43c0680a8 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -553,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
553 char buf[32], *ptr; 553 char buf[32], *ptr;
554 int ret = 0; 554 int ret = 0;
555 555
556 if (!is_c_varname(pf->pvar->var)) { 556 /* Copy raw parameters */
557 /* Copy raw parameters */ 557 if (!is_c_varname(pf->pvar->var))
558 pf->tvar->value = strdup(pf->pvar->var); 558 return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
559 if (pf->tvar->value == NULL)
560 return -ENOMEM;
561 if (pf->pvar->type) {
562 pf->tvar->type = strdup(pf->pvar->type);
563 if (pf->tvar->type == NULL)
564 return -ENOMEM;
565 }
566 if (pf->pvar->name) {
567 pf->tvar->name = strdup(pf->pvar->name);
568 if (pf->tvar->name == NULL)
569 return -ENOMEM;
570 } else
571 pf->tvar->name = NULL;
572 return 0;
573 }
574 559
575 if (pf->pvar->name) 560 if (pf->pvar->name)
576 pf->tvar->name = strdup(pf->pvar->name); 561 pf->tvar->name = strdup(pf->pvar->name);
@@ -1355,7 +1340,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
1355 vl->point.offset); 1340 vl->point.offset);
1356 1341
1357 /* Find local variables */ 1342 /* Find local variables */
1358 vl->vars = strlist__new(true, NULL); 1343 vl->vars = strlist__new(NULL, NULL);
1359 if (vl->vars == NULL) 1344 if (vl->vars == NULL)
1360 return -ENOMEM; 1345 return -ENOMEM;
1361 af->child = true; 1346 af->child = true;
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 0766d98c5da5..51be28b1bca2 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -16,7 +16,7 @@ util/util.c
16util/xyarray.c 16util/xyarray.c
17util/cgroup.c 17util/cgroup.c
18util/rblist.c 18util/rblist.c
19util/stat.c 19util/counts.c
20util/strlist.c 20util/strlist.c
21util/trace-event.c 21util/trace-event.c
22../lib/rbtree.c 22../lib/rbtree.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 626422eda727..6324fe6b161e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -941,76 +941,84 @@ static int pyrf_evlist__setup_types(void)
941 return PyType_Ready(&pyrf_evlist__type); 941 return PyType_Ready(&pyrf_evlist__type);
942} 942}
943 943
944#define PERF_CONST(name) { #name, PERF_##name }
945
944static struct { 946static struct {
945 const char *name; 947 const char *name;
946 int value; 948 int value;
947} perf__constants[] = { 949} perf__constants[] = {
948 { "TYPE_HARDWARE", PERF_TYPE_HARDWARE }, 950 PERF_CONST(TYPE_HARDWARE),
949 { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE }, 951 PERF_CONST(TYPE_SOFTWARE),
950 { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT }, 952 PERF_CONST(TYPE_TRACEPOINT),
951 { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE }, 953 PERF_CONST(TYPE_HW_CACHE),
952 { "TYPE_RAW", PERF_TYPE_RAW }, 954 PERF_CONST(TYPE_RAW),
953 { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT }, 955 PERF_CONST(TYPE_BREAKPOINT),
954 956
955 { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES }, 957 PERF_CONST(COUNT_HW_CPU_CYCLES),
956 { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS }, 958 PERF_CONST(COUNT_HW_INSTRUCTIONS),
957 { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES }, 959 PERF_CONST(COUNT_HW_CACHE_REFERENCES),
958 { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES }, 960 PERF_CONST(COUNT_HW_CACHE_MISSES),
959 { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 961 PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS),
960 { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, 962 PERF_CONST(COUNT_HW_BRANCH_MISSES),
961 { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, 963 PERF_CONST(COUNT_HW_BUS_CYCLES),
962 { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, 964 PERF_CONST(COUNT_HW_CACHE_L1D),
963 { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, 965 PERF_CONST(COUNT_HW_CACHE_L1I),
964 { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, 966 PERF_CONST(COUNT_HW_CACHE_LL),
965 { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB }, 967 PERF_CONST(COUNT_HW_CACHE_DTLB),
966 { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB }, 968 PERF_CONST(COUNT_HW_CACHE_ITLB),
967 { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU }, 969 PERF_CONST(COUNT_HW_CACHE_BPU),
968 { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ }, 970 PERF_CONST(COUNT_HW_CACHE_OP_READ),
969 { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE }, 971 PERF_CONST(COUNT_HW_CACHE_OP_WRITE),
970 { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH }, 972 PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH),
971 { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, 973 PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS),
972 { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, 974 PERF_CONST(COUNT_HW_CACHE_RESULT_MISS),
973 975
974 { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 976 PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND),
975 { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 977 PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND),
976 978
977 { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, 979 PERF_CONST(COUNT_SW_CPU_CLOCK),
978 { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, 980 PERF_CONST(COUNT_SW_TASK_CLOCK),
979 { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, 981 PERF_CONST(COUNT_SW_PAGE_FAULTS),
980 { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES }, 982 PERF_CONST(COUNT_SW_CONTEXT_SWITCHES),
981 { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS }, 983 PERF_CONST(COUNT_SW_CPU_MIGRATIONS),
982 { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN }, 984 PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN),
983 { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ }, 985 PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ),
984 { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS }, 986 PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS),
985 { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS }, 987 PERF_CONST(COUNT_SW_EMULATION_FAULTS),
986 { "COUNT_SW_DUMMY", PERF_COUNT_SW_DUMMY }, 988 PERF_CONST(COUNT_SW_DUMMY),
987 989
988 { "SAMPLE_IP", PERF_SAMPLE_IP }, 990 PERF_CONST(SAMPLE_IP),
989 { "SAMPLE_TID", PERF_SAMPLE_TID }, 991 PERF_CONST(SAMPLE_TID),
990 { "SAMPLE_TIME", PERF_SAMPLE_TIME }, 992 PERF_CONST(SAMPLE_TIME),
991 { "SAMPLE_ADDR", PERF_SAMPLE_ADDR }, 993 PERF_CONST(SAMPLE_ADDR),
992 { "SAMPLE_READ", PERF_SAMPLE_READ }, 994 PERF_CONST(SAMPLE_READ),
993 { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN }, 995 PERF_CONST(SAMPLE_CALLCHAIN),
994 { "SAMPLE_ID", PERF_SAMPLE_ID }, 996 PERF_CONST(SAMPLE_ID),
995 { "SAMPLE_CPU", PERF_SAMPLE_CPU }, 997 PERF_CONST(SAMPLE_CPU),
996 { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD }, 998 PERF_CONST(SAMPLE_PERIOD),
997 { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID }, 999 PERF_CONST(SAMPLE_STREAM_ID),
998 { "SAMPLE_RAW", PERF_SAMPLE_RAW }, 1000 PERF_CONST(SAMPLE_RAW),
999 1001
1000 { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED }, 1002 PERF_CONST(FORMAT_TOTAL_TIME_ENABLED),
1001 { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING }, 1003 PERF_CONST(FORMAT_TOTAL_TIME_RUNNING),
1002 { "FORMAT_ID", PERF_FORMAT_ID }, 1004 PERF_CONST(FORMAT_ID),
1003 { "FORMAT_GROUP", PERF_FORMAT_GROUP }, 1005 PERF_CONST(FORMAT_GROUP),
1004 1006
1005 { "RECORD_MMAP", PERF_RECORD_MMAP }, 1007 PERF_CONST(RECORD_MMAP),
1006 { "RECORD_LOST", PERF_RECORD_LOST }, 1008 PERF_CONST(RECORD_LOST),
1007 { "RECORD_COMM", PERF_RECORD_COMM }, 1009 PERF_CONST(RECORD_COMM),
1008 { "RECORD_EXIT", PERF_RECORD_EXIT }, 1010 PERF_CONST(RECORD_EXIT),
1009 { "RECORD_THROTTLE", PERF_RECORD_THROTTLE }, 1011 PERF_CONST(RECORD_THROTTLE),
1010 { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE }, 1012 PERF_CONST(RECORD_UNTHROTTLE),
1011 { "RECORD_FORK", PERF_RECORD_FORK }, 1013 PERF_CONST(RECORD_FORK),
1012 { "RECORD_READ", PERF_RECORD_READ }, 1014 PERF_CONST(RECORD_READ),
1013 { "RECORD_SAMPLE", PERF_RECORD_SAMPLE }, 1015 PERF_CONST(RECORD_SAMPLE),
1016 PERF_CONST(RECORD_MMAP2),
1017 PERF_CONST(RECORD_AUX),
1018 PERF_CONST(RECORD_ITRACE_START),
1019 PERF_CONST(RECORD_LOST_SAMPLES),
1020 PERF_CONST(RECORD_SWITCH),
1021 PERF_CONST(RECORD_SWITCH_CPU_WIDE),
1014 { .name = NULL, }, 1022 { .name = NULL, },
1015}; 1023};
1016 1024
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 1f7becbe5e18..0467367dc315 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel)
85 evsel->attr.comm_exec = 1; 85 evsel->attr.comm_exec = 1;
86} 86}
87 87
88static void perf_probe_context_switch(struct perf_evsel *evsel)
89{
90 evsel->attr.context_switch = 1;
91}
92
88bool perf_can_sample_identifier(void) 93bool perf_can_sample_identifier(void)
89{ 94{
90 return perf_probe_api(perf_probe_sample_identifier); 95 return perf_probe_api(perf_probe_sample_identifier);
@@ -95,6 +100,35 @@ static bool perf_can_comm_exec(void)
95 return perf_probe_api(perf_probe_comm_exec); 100 return perf_probe_api(perf_probe_comm_exec);
96} 101}
97 102
103bool perf_can_record_switch_events(void)
104{
105 return perf_probe_api(perf_probe_context_switch);
106}
107
108bool perf_can_record_cpu_wide(void)
109{
110 struct perf_event_attr attr = {
111 .type = PERF_TYPE_SOFTWARE,
112 .config = PERF_COUNT_SW_CPU_CLOCK,
113 .exclude_kernel = 1,
114 };
115 struct cpu_map *cpus;
116 int cpu, fd;
117
118 cpus = cpu_map__new(NULL);
119 if (!cpus)
120 return false;
121 cpu = cpus->map[0];
122 cpu_map__put(cpus);
123
124 fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
125 if (fd < 0)
126 return false;
127 close(fd);
128
129 return true;
130}
131
98void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) 132void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
99{ 133{
100 struct perf_evsel *evsel; 134 struct perf_evsel *evsel;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ed9dc2555ec7..8a4537ee9bc3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -170,7 +170,7 @@ static void perf_session__delete_threads(struct perf_session *session)
170 machine__delete_threads(&session->machines.host); 170 machine__delete_threads(&session->machines.host);
171} 171}
172 172
173static void perf_session_env__delete(struct perf_session_env *env) 173static void perf_session_env__exit(struct perf_env *env)
174{ 174{
175 zfree(&env->hostname); 175 zfree(&env->hostname);
176 zfree(&env->os_release); 176 zfree(&env->os_release);
@@ -180,6 +180,7 @@ static void perf_session_env__delete(struct perf_session_env *env)
180 zfree(&env->cpuid); 180 zfree(&env->cpuid);
181 181
182 zfree(&env->cmdline); 182 zfree(&env->cmdline);
183 zfree(&env->cmdline_argv);
183 zfree(&env->sibling_cores); 184 zfree(&env->sibling_cores);
184 zfree(&env->sibling_threads); 185 zfree(&env->sibling_threads);
185 zfree(&env->numa_nodes); 186 zfree(&env->numa_nodes);
@@ -192,7 +193,7 @@ void perf_session__delete(struct perf_session *session)
192 auxtrace_index__free(&session->auxtrace_index); 193 auxtrace_index__free(&session->auxtrace_index);
193 perf_session__destroy_kernel_maps(session); 194 perf_session__destroy_kernel_maps(session);
194 perf_session__delete_threads(session); 195 perf_session__delete_threads(session);
195 perf_session_env__delete(&session->header.env); 196 perf_session_env__exit(&session->header.env);
196 machines__exit(&session->machines); 197 machines__exit(&session->machines);
197 if (session->file) 198 if (session->file)
198 perf_data_file__close(session->file); 199 perf_data_file__close(session->file);
@@ -332,6 +333,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
332 tool->aux = perf_event__process_aux; 333 tool->aux = perf_event__process_aux;
333 if (tool->itrace_start == NULL) 334 if (tool->itrace_start == NULL)
334 tool->itrace_start = perf_event__process_itrace_start; 335 tool->itrace_start = perf_event__process_itrace_start;
336 if (tool->context_switch == NULL)
337 tool->context_switch = perf_event__process_switch;
335 if (tool->read == NULL) 338 if (tool->read == NULL)
336 tool->read = process_event_sample_stub; 339 tool->read = process_event_sample_stub;
337 if (tool->throttle == NULL) 340 if (tool->throttle == NULL)
@@ -470,6 +473,19 @@ static void perf_event__itrace_start_swap(union perf_event *event,
470 swap_sample_id_all(event, &event->itrace_start + 1); 473 swap_sample_id_all(event, &event->itrace_start + 1);
471} 474}
472 475
476static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
477{
478 if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
479 event->context_switch.next_prev_pid =
480 bswap_32(event->context_switch.next_prev_pid);
481 event->context_switch.next_prev_tid =
482 bswap_32(event->context_switch.next_prev_tid);
483 }
484
485 if (sample_id_all)
486 swap_sample_id_all(event, &event->context_switch + 1);
487}
488
473static void perf_event__throttle_swap(union perf_event *event, 489static void perf_event__throttle_swap(union perf_event *event,
474 bool sample_id_all) 490 bool sample_id_all)
475{ 491{
@@ -632,6 +648,8 @@ static perf_event__swap_op perf_event__swap_ops[] = {
632 [PERF_RECORD_AUX] = perf_event__aux_swap, 648 [PERF_RECORD_AUX] = perf_event__aux_swap,
633 [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, 649 [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
634 [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, 650 [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
651 [PERF_RECORD_SWITCH] = perf_event__switch_swap,
652 [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
635 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, 653 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
636 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, 654 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
637 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, 655 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -766,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
766 784
767 printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); 785 printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
768 786
769 for (i = 0; i < sample->branch_stack->nr; i++) 787 for (i = 0; i < sample->branch_stack->nr; i++) {
770 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", 788 struct branch_entry *e = &sample->branch_stack->entries[i];
771 i, sample->branch_stack->entries[i].from, 789
772 sample->branch_stack->entries[i].to); 790 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
791 i, e->from, e->to,
792 e->flags.cycles,
793 e->flags.mispred ? "M" : " ",
794 e->flags.predicted ? "P" : " ",
795 e->flags.abort ? "A" : " ",
796 e->flags.in_tx ? "T" : " ",
797 (unsigned)e->flags.reserved);
798 }
773} 799}
774 800
775static void regs_dump__printf(u64 mask, u64 *regs) 801static void regs_dump__printf(u64 mask, u64 *regs)
@@ -1093,6 +1119,9 @@ static int machines__deliver_event(struct machines *machines,
1093 return tool->aux(tool, event, sample, machine); 1119 return tool->aux(tool, event, sample, machine);
1094 case PERF_RECORD_ITRACE_START: 1120 case PERF_RECORD_ITRACE_START:
1095 return tool->itrace_start(tool, event, sample, machine); 1121 return tool->itrace_start(tool, event, sample, machine);
1122 case PERF_RECORD_SWITCH:
1123 case PERF_RECORD_SWITCH_CPU_WIDE:
1124 return tool->context_switch(tool, event, sample, machine);
1096 default: 1125 default:
1097 ++evlist->stats.nr_unknown_events; 1126 ++evlist->stats.nr_unknown_events;
1098 return -1; 1127 return -1;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4c65a143a34c..7e3871606df3 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -9,7 +9,7 @@ regex_t parent_regex;
9const char default_parent_pattern[] = "^sys_|^do_page_fault"; 9const char default_parent_pattern[] = "^sys_|^do_page_fault";
10const char *parent_pattern = default_parent_pattern; 10const char *parent_pattern = default_parent_pattern;
11const char default_sort_order[] = "comm,dso,symbol"; 11const char default_sort_order[] = "comm,dso,symbol";
12const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; 12const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
13const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; 13const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
14const char default_top_sort_order[] = "dso,symbol"; 14const char default_top_sort_order[] = "dso,symbol";
15const char default_diff_sort_order[] = "dso,symbol"; 15const char default_diff_sort_order[] = "dso,symbol";
@@ -319,6 +319,59 @@ struct sort_entry sort_srcline = {
319 .se_width_idx = HISTC_SRCLINE, 319 .se_width_idx = HISTC_SRCLINE,
320}; 320};
321 321
322/* --sort srcfile */
323
324static char no_srcfile[1];
325
326static char *get_srcfile(struct hist_entry *e)
327{
328 char *sf, *p;
329 struct map *map = e->ms.map;
330
331 sf = get_srcline(map->dso, map__rip_2objdump(map, e->ip),
332 e->ms.sym, true);
333 if (!strcmp(sf, SRCLINE_UNKNOWN))
334 return no_srcfile;
335 p = strchr(sf, ':');
336 if (p && *sf) {
337 *p = 0;
338 return sf;
339 }
340 free(sf);
341 return no_srcfile;
342}
343
344static int64_t
345sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
346{
347 if (!left->srcfile) {
348 if (!left->ms.map)
349 left->srcfile = no_srcfile;
350 else
351 left->srcfile = get_srcfile(left);
352 }
353 if (!right->srcfile) {
354 if (!right->ms.map)
355 right->srcfile = no_srcfile;
356 else
357 right->srcfile = get_srcfile(right);
358 }
359 return strcmp(right->srcfile, left->srcfile);
360}
361
362static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
363 size_t size, unsigned int width)
364{
365 return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
366}
367
368struct sort_entry sort_srcfile = {
369 .se_header = "Source File",
370 .se_cmp = sort__srcfile_cmp,
371 .se_snprintf = hist_entry__srcfile_snprintf,
372 .se_width_idx = HISTC_SRCFILE,
373};
374
322/* --sort parent */ 375/* --sort parent */
323 376
324static int64_t 377static int64_t
@@ -526,6 +579,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
526 return repsep_snprintf(bf, size, "%-*.*s", width, width, out); 579 return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
527} 580}
528 581
582static int64_t
583sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
584{
585 return left->branch_info->flags.cycles -
586 right->branch_info->flags.cycles;
587}
588
589static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
590 size_t size, unsigned int width)
591{
592 if (he->branch_info->flags.cycles == 0)
593 return repsep_snprintf(bf, size, "%-*s", width, "-");
594 return repsep_snprintf(bf, size, "%-*hd", width,
595 he->branch_info->flags.cycles);
596}
597
598struct sort_entry sort_cycles = {
599 .se_header = "Basic Block Cycles",
600 .se_cmp = sort__cycles_cmp,
601 .se_snprintf = hist_entry__cycles_snprintf,
602 .se_width_idx = HISTC_CYCLES,
603};
604
529/* --sort daddr_sym */ 605/* --sort daddr_sym */
530static int64_t 606static int64_t
531sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) 607sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
@@ -1173,6 +1249,7 @@ static struct sort_dimension common_sort_dimensions[] = {
1173 DIM(SORT_PARENT, "parent", sort_parent), 1249 DIM(SORT_PARENT, "parent", sort_parent),
1174 DIM(SORT_CPU, "cpu", sort_cpu), 1250 DIM(SORT_CPU, "cpu", sort_cpu),
1175 DIM(SORT_SRCLINE, "srcline", sort_srcline), 1251 DIM(SORT_SRCLINE, "srcline", sort_srcline),
1252 DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
1176 DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), 1253 DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
1177 DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), 1254 DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
1178 DIM(SORT_TRANSACTION, "transaction", sort_transaction), 1255 DIM(SORT_TRANSACTION, "transaction", sort_transaction),
@@ -1190,6 +1267,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
1190 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), 1267 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
1191 DIM(SORT_IN_TX, "in_tx", sort_in_tx), 1268 DIM(SORT_IN_TX, "in_tx", sort_in_tx),
1192 DIM(SORT_ABORT, "abort", sort_abort), 1269 DIM(SORT_ABORT, "abort", sort_abort),
1270 DIM(SORT_CYCLES, "cycles", sort_cycles),
1193}; 1271};
1194 1272
1195#undef DIM 1273#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e97cd476d336..3c2a399f8f5b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -114,6 +114,7 @@ struct hist_entry {
114 }; 114 };
115 }; 115 };
116 char *srcline; 116 char *srcline;
117 char *srcfile;
117 struct symbol *parent; 118 struct symbol *parent;
118 struct rb_root sorted_chain; 119 struct rb_root sorted_chain;
119 struct branch_info *branch_info; 120 struct branch_info *branch_info;
@@ -172,6 +173,7 @@ enum sort_type {
172 SORT_PARENT, 173 SORT_PARENT,
173 SORT_CPU, 174 SORT_CPU,
174 SORT_SRCLINE, 175 SORT_SRCLINE,
176 SORT_SRCFILE,
175 SORT_LOCAL_WEIGHT, 177 SORT_LOCAL_WEIGHT,
176 SORT_GLOBAL_WEIGHT, 178 SORT_GLOBAL_WEIGHT,
177 SORT_TRANSACTION, 179 SORT_TRANSACTION,
@@ -185,6 +187,7 @@ enum sort_type {
185 SORT_MISPREDICT, 187 SORT_MISPREDICT,
186 SORT_ABORT, 188 SORT_ABORT,
187 SORT_IN_TX, 189 SORT_IN_TX,
190 SORT_CYCLES,
188 191
189 /* memory mode specific sort keys */ 192 /* memory mode specific sort keys */
190 __SORT_MEMORY_MODE, 193 __SORT_MEMORY_MODE,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index c93fb0c5bd0b..fc08248f08ca 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -10,6 +10,8 @@
10 10
11#include "symbol.h" 11#include "symbol.h"
12 12
13bool srcline_full_filename;
14
13#ifdef HAVE_LIBBFD_SUPPORT 15#ifdef HAVE_LIBBFD_SUPPORT
14 16
15/* 17/*
@@ -277,7 +279,9 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
277 if (!addr2line(dso_name, addr, &file, &line, dso)) 279 if (!addr2line(dso_name, addr, &file, &line, dso))
278 goto out; 280 goto out;
279 281
280 if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) { 282 if (asprintf(&srcline, "%s:%u",
283 srcline_full_filename ? file : basename(file),
284 line) < 0) {
281 free(file); 285 free(file);
282 goto out; 286 goto out;
283 } 287 }
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index f2a0d1521e26..415c359de465 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -97,55 +97,6 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
97 } 97 }
98} 98}
99 99
100struct perf_counts *perf_counts__new(int ncpus, int nthreads)
101{
102 struct perf_counts *counts = zalloc(sizeof(*counts));
103
104 if (counts) {
105 struct xyarray *values;
106
107 values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
108 if (!values) {
109 free(counts);
110 return NULL;
111 }
112
113 counts->values = values;
114 }
115
116 return counts;
117}
118
119void perf_counts__delete(struct perf_counts *counts)
120{
121 if (counts) {
122 xyarray__delete(counts->values);
123 free(counts);
124 }
125}
126
127static void perf_counts__reset(struct perf_counts *counts)
128{
129 xyarray__reset(counts->values);
130}
131
132void perf_evsel__reset_counts(struct perf_evsel *evsel)
133{
134 perf_counts__reset(evsel->counts);
135}
136
137int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads)
138{
139 evsel->counts = perf_counts__new(ncpus, nthreads);
140 return evsel->counts != NULL ? 0 : -ENOMEM;
141}
142
143void perf_evsel__free_counts(struct perf_evsel *evsel)
144{
145 perf_counts__delete(evsel->counts);
146 evsel->counts = NULL;
147}
148
149void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) 100void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
150{ 101{
151 int i; 102 int i;
@@ -238,3 +189,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
238 perf_evsel__reset_counts(evsel); 189 perf_evsel__reset_counts(evsel);
239 } 190 }
240} 191}
192
193static void zero_per_pkg(struct perf_evsel *counter)
194{
195 if (counter->per_pkg_mask)
196 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
197}
198
199static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
200{
201 unsigned long *mask = counter->per_pkg_mask;
202 struct cpu_map *cpus = perf_evsel__cpus(counter);
203 int s;
204
205 *skip = false;
206
207 if (!counter->per_pkg)
208 return 0;
209
210 if (cpu_map__empty(cpus))
211 return 0;
212
213 if (!mask) {
214 mask = zalloc(MAX_NR_CPUS);
215 if (!mask)
216 return -ENOMEM;
217
218 counter->per_pkg_mask = mask;
219 }
220
221 s = cpu_map__get_socket(cpus, cpu);
222 if (s < 0)
223 return -1;
224
225 *skip = test_and_set_bit(s, mask) == 1;
226 return 0;
227}
228
229static int
230process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
231 int cpu, int thread,
232 struct perf_counts_values *count)
233{
234 struct perf_counts_values *aggr = &evsel->counts->aggr;
235 static struct perf_counts_values zero;
236 bool skip = false;
237
238 if (check_per_pkg(evsel, cpu, &skip)) {
239 pr_err("failed to read per-pkg counter\n");
240 return -1;
241 }
242
243 if (skip)
244 count = &zero;
245
246 switch (config->aggr_mode) {
247 case AGGR_THREAD:
248 case AGGR_CORE:
249 case AGGR_SOCKET:
250 case AGGR_NONE:
251 if (!evsel->snapshot)
252 perf_evsel__compute_deltas(evsel, cpu, thread, count);
253 perf_counts_values__scale(count, config->scale, NULL);
254 if (config->aggr_mode == AGGR_NONE)
255 perf_stat__update_shadow_stats(evsel, count->values, cpu);
256 break;
257 case AGGR_GLOBAL:
258 aggr->val += count->val;
259 if (config->scale) {
260 aggr->ena += count->ena;
261 aggr->run += count->run;
262 }
263 default:
264 break;
265 }
266
267 return 0;
268}
269
270static int process_counter_maps(struct perf_stat_config *config,
271 struct perf_evsel *counter)
272{
273 int nthreads = thread_map__nr(counter->threads);
274 int ncpus = perf_evsel__nr_cpus(counter);
275 int cpu, thread;
276
277 if (counter->system_wide)
278 nthreads = 1;
279
280 for (thread = 0; thread < nthreads; thread++) {
281 for (cpu = 0; cpu < ncpus; cpu++) {
282 if (process_counter_values(config, counter, cpu, thread,
283 perf_counts(counter->counts, cpu, thread)))
284 return -1;
285 }
286 }
287
288 return 0;
289}
290
291int perf_stat_process_counter(struct perf_stat_config *config,
292 struct perf_evsel *counter)
293{
294 struct perf_counts_values *aggr = &counter->counts->aggr;
295 struct perf_stat *ps = counter->priv;
296 u64 *count = counter->counts->aggr.values;
297 int i, ret;
298
299 aggr->val = aggr->ena = aggr->run = 0;
300 init_stats(ps->res_stats);
301
302 if (counter->per_pkg)
303 zero_per_pkg(counter);
304
305 ret = process_counter_maps(config, counter);
306 if (ret)
307 return ret;
308
309 if (config->aggr_mode != AGGR_GLOBAL)
310 return 0;
311
312 if (!counter->snapshot)
313 perf_evsel__compute_deltas(counter, -1, -1, aggr);
314 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
315
316 for (i = 0; i < 3; i++)
317 update_stats(&ps->res_stats[i], count[i]);
318
319 if (verbose) {
320 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
321 perf_evsel__name(counter), count[0], count[1], count[2]);
322 }
323
324 /*
325 * Save the full runtime - to allow normalization during printout:
326 */
327 perf_stat__update_shadow_stats(counter, count, 0);
328
329 return 0;
330}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 1cfbe0a980ac..62448c8175d3 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -33,29 +33,13 @@ enum aggr_mode {
33 AGGR_THREAD, 33 AGGR_THREAD,
34}; 34};
35 35
36struct perf_counts_values { 36struct perf_stat_config {
37 union { 37 enum aggr_mode aggr_mode;
38 struct { 38 bool scale;
39 u64 val; 39 FILE *output;
40 u64 ena; 40 unsigned int interval;
41 u64 run;
42 };
43 u64 values[3];
44 };
45}; 41};
46 42
47struct perf_counts {
48 s8 scaled;
49 struct perf_counts_values aggr;
50 struct xyarray *values;
51};
52
53static inline struct perf_counts_values*
54perf_counts(struct perf_counts *counts, int cpu, int thread)
55{
56 return xyarray__entry(counts->values, cpu, thread);
57}
58
59void update_stats(struct stats *stats, u64 val); 43void update_stats(struct stats *stats, u64 val);
60double avg_stats(struct stats *stats); 44double avg_stats(struct stats *stats);
61double stddev_stats(struct stats *stats); 45double stddev_stats(struct stats *stats);
@@ -89,13 +73,6 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
89void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, 73void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
90 double avg, int cpu, enum aggr_mode aggr); 74 double avg, int cpu, enum aggr_mode aggr);
91 75
92struct perf_counts *perf_counts__new(int ncpus, int nthreads);
93void perf_counts__delete(struct perf_counts *counts);
94
95void perf_evsel__reset_counts(struct perf_evsel *evsel);
96int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads);
97void perf_evsel__free_counts(struct perf_evsel *evsel);
98
99void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); 76void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
100int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); 77int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
101void perf_evsel__free_stat_priv(struct perf_evsel *evsel); 78void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
@@ -109,4 +86,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
109int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); 86int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
110void perf_evlist__free_stats(struct perf_evlist *evlist); 87void perf_evlist__free_stats(struct perf_evlist *evlist);
111void perf_evlist__reset_stats(struct perf_evlist *evlist); 88void perf_evlist__reset_stats(struct perf_evlist *evlist);
89
90int perf_stat_process_counter(struct perf_stat_config *config,
91 struct perf_evsel *counter);
112#endif 92#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 6afd6106ceb5..fc8781de62db 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -357,3 +357,42 @@ void *memdup(const void *src, size_t len)
357 357
358 return p; 358 return p;
359} 359}
360
361char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
362{
363 /*
364 * FIXME: replace this with an expression using log10() when we
365 * find a suitable implementation, maybe the one in the dvb drivers...
366 *
367 * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
368 */
369 size_t size = nints * 28 + 1; /* \0 */
370 size_t i, printed = 0;
371 char *expr = malloc(size);
372
373 if (expr) {
374 const char *or_and = "||", *eq_neq = "==";
375 char *e = expr;
376
377 if (!in) {
378 or_and = "&&";
379 eq_neq = "!=";
380 }
381
382 for (i = 0; i < nints; ++i) {
383 if (printed == size)
384 goto out_err_overflow;
385
386 if (i > 0)
387 printed += snprintf(e + printed, size - printed, " %s ", or_and);
388 printed += scnprintf(e + printed, size - printed,
389 "%s %s %d", var, eq_neq, ints[i]);
390 }
391 }
392
393 return expr;
394
395out_err_overflow:
396 free(expr);
397 return NULL;
398}
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 71f9d102b96f..bdf98f6f27bb 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -72,7 +72,7 @@ int strlist__load(struct strlist *slist, const char *filename)
72 FILE *fp = fopen(filename, "r"); 72 FILE *fp = fopen(filename, "r");
73 73
74 if (fp == NULL) 74 if (fp == NULL)
75 return errno; 75 return -errno;
76 76
77 while (fgets(entry, sizeof(entry), fp) != NULL) { 77 while (fgets(entry, sizeof(entry), fp) != NULL) {
78 const size_t len = strlen(entry); 78 const size_t len = strlen(entry);
@@ -108,43 +108,70 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry)
108 return snode; 108 return snode;
109} 109}
110 110
111static int strlist__parse_list_entry(struct strlist *slist, const char *s) 111static int strlist__parse_list_entry(struct strlist *slist, const char *s,
112 const char *subst_dir)
112{ 113{
114 int err;
115 char *subst = NULL;
116
113 if (strncmp(s, "file://", 7) == 0) 117 if (strncmp(s, "file://", 7) == 0)
114 return strlist__load(slist, s + 7); 118 return strlist__load(slist, s + 7);
115 119
116 return strlist__add(slist, s); 120 if (subst_dir) {
121 err = -ENOMEM;
122 if (asprintf(&subst, "%s/%s", subst_dir, s) < 0)
123 goto out;
124
125 if (access(subst, F_OK) == 0) {
126 err = strlist__load(slist, subst);
127 goto out;
128 }
129 }
130
131 err = strlist__add(slist, s);
132out:
133 free(subst);
134 return err;
117} 135}
118 136
119int strlist__parse_list(struct strlist *slist, const char *s) 137static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir)
120{ 138{
121 char *sep; 139 char *sep;
122 int err; 140 int err;
123 141
124 while ((sep = strchr(s, ',')) != NULL) { 142 while ((sep = strchr(s, ',')) != NULL) {
125 *sep = '\0'; 143 *sep = '\0';
126 err = strlist__parse_list_entry(slist, s); 144 err = strlist__parse_list_entry(slist, s, subst_dir);
127 *sep = ','; 145 *sep = ',';
128 if (err != 0) 146 if (err != 0)
129 return err; 147 return err;
130 s = sep + 1; 148 s = sep + 1;
131 } 149 }
132 150
133 return *s ? strlist__parse_list_entry(slist, s) : 0; 151 return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0;
134} 152}
135 153
136struct strlist *strlist__new(bool dupstr, const char *list) 154struct strlist *strlist__new(const char *list, const struct strlist_config *config)
137{ 155{
138 struct strlist *slist = malloc(sizeof(*slist)); 156 struct strlist *slist = malloc(sizeof(*slist));
139 157
140 if (slist != NULL) { 158 if (slist != NULL) {
159 bool dupstr = true;
160 const char *dirname = NULL;
161
162 if (config) {
163 dupstr = !config->dont_dupstr;
164 dirname = config->dirname;
165 }
166
141 rblist__init(&slist->rblist); 167 rblist__init(&slist->rblist);
142 slist->rblist.node_cmp = strlist__node_cmp; 168 slist->rblist.node_cmp = strlist__node_cmp;
143 slist->rblist.node_new = strlist__node_new; 169 slist->rblist.node_new = strlist__node_new;
144 slist->rblist.node_delete = strlist__node_delete; 170 slist->rblist.node_delete = strlist__node_delete;
145 171
146 slist->dupstr = dupstr; 172 slist->dupstr = dupstr;
147 if (list && strlist__parse_list(slist, list) != 0) 173
174 if (list && strlist__parse_list(slist, list, dirname) != 0)
148 goto out_error; 175 goto out_error;
149 } 176 }
150 177
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index 5c7f87069d9c..297565aa7535 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -16,7 +16,12 @@ struct strlist {
16 bool dupstr; 16 bool dupstr;
17}; 17};
18 18
19struct strlist *strlist__new(bool dupstr, const char *slist); 19struct strlist_config {
20 bool dont_dupstr;
21 const char *dirname;
22};
23
24struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
20void strlist__delete(struct strlist *slist); 25void strlist__delete(struct strlist *slist);
21 26
22void strlist__remove(struct strlist *slist, struct str_node *sn); 27void strlist__remove(struct strlist *slist, struct str_node *sn);
@@ -74,6 +79,4 @@ static inline struct str_node *strlist__next(struct str_node *sn)
74#define strlist__for_each_safe(pos, n, slist) \ 79#define strlist__for_each_safe(pos, n, slist) \
75 for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ 80 for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
76 pos = n, n = strlist__next(n)) 81 pos = n, n = strlist__next(n))
77
78int strlist__parse_list(struct strlist *slist, const char *s);
79#endif /* __PERF_STRLIST_H */ 82#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 65f7e389ae09..53bb5f59ec58 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
38#endif 38#endif
39 39
40#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT 40#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
41static int elf_getphdrnum(Elf *elf, size_t *dst) 41int elf_getphdrnum(Elf *elf, size_t *dst)
42{ 42{
43 GElf_Ehdr gehdr; 43 GElf_Ehdr gehdr;
44 GElf_Ehdr *ehdr; 44 GElf_Ehdr *ehdr;
@@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map,
875 } 875 }
876 } 876 }
877 877
878 /*
879 * Handle any relocation of vdso necessary because older kernels
880 * attempted to prelink vdso to its virtual address.
881 */
882 if (dso__is_vdso(dso)) {
883 GElf_Shdr tshdr;
884
885 if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
886 map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
887 }
888
878 dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); 889 dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
879 /* 890 /*
880 * Initial kernel and module mappings do not map to the dso. For 891 * Initial kernel and module mappings do not map to the dso. For
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 60f11414bb5c..1f97ffb158a6 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -441,10 +441,25 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
441 return &s->sym; 441 return &s->sym;
442} 442}
443 443
444void dso__reset_find_symbol_cache(struct dso *dso)
445{
446 enum map_type type;
447
448 for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
449 dso->last_find_result[type].addr = 0;
450 dso->last_find_result[type].symbol = NULL;
451 }
452}
453
444struct symbol *dso__find_symbol(struct dso *dso, 454struct symbol *dso__find_symbol(struct dso *dso,
445 enum map_type type, u64 addr) 455 enum map_type type, u64 addr)
446{ 456{
447 return symbols__find(&dso->symbols[type], addr); 457 if (dso->last_find_result[type].addr != addr) {
458 dso->last_find_result[type].addr = addr;
459 dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
460 }
461
462 return dso->last_find_result[type].symbol;
448} 463}
449 464
450struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) 465struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
@@ -1133,8 +1148,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1133 1148
1134 fd = open(kcore_filename, O_RDONLY); 1149 fd = open(kcore_filename, O_RDONLY);
1135 if (fd < 0) { 1150 if (fd < 0) {
1136 pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", 1151 pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
1137 kcore_filename); 1152 kcore_filename);
1138 return -EINVAL; 1153 return -EINVAL;
1139 } 1154 }
1140 1155
@@ -1838,7 +1853,7 @@ static void vmlinux_path__exit(void)
1838 zfree(&vmlinux_path); 1853 zfree(&vmlinux_path);
1839} 1854}
1840 1855
1841static int vmlinux_path__init(struct perf_session_env *env) 1856static int vmlinux_path__init(struct perf_env *env)
1842{ 1857{
1843 struct utsname uts; 1858 struct utsname uts;
1844 char bf[PATH_MAX]; 1859 char bf[PATH_MAX];
@@ -1906,7 +1921,7 @@ int setup_list(struct strlist **list, const char *list_str,
1906 if (list_str == NULL) 1921 if (list_str == NULL)
1907 return 0; 1922 return 0;
1908 1923
1909 *list = strlist__new(true, list_str); 1924 *list = strlist__new(list_str, NULL);
1910 if (!*list) { 1925 if (!*list) {
1911 pr_err("problems parsing %s list\n", list_name); 1926 pr_err("problems parsing %s list\n", list_name);
1912 return -1; 1927 return -1;
@@ -1949,7 +1964,7 @@ static bool symbol__read_kptr_restrict(void)
1949 return value; 1964 return value;
1950} 1965}
1951 1966
1952int symbol__init(struct perf_session_env *env) 1967int symbol__init(struct perf_env *env)
1953{ 1968{
1954 const char *symfs; 1969 const char *symfs;
1955 1970
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51af142..440ba8ae888f 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -106,7 +106,8 @@ struct symbol_conf {
106 filter_relative, 106 filter_relative,
107 show_hist_headers, 107 show_hist_headers,
108 branch_callstack, 108 branch_callstack,
109 has_filter; 109 has_filter,
110 show_ref_callgraph;
110 const char *vmlinux_name, 111 const char *vmlinux_name,
111 *kallsyms_name, 112 *kallsyms_name,
112 *source_prefix, 113 *source_prefix,
@@ -251,8 +252,8 @@ int modules__parse(const char *filename, void *arg,
251int filename__read_debuglink(const char *filename, char *debuglink, 252int filename__read_debuglink(const char *filename, char *debuglink,
252 size_t size); 253 size_t size);
253 254
254struct perf_session_env; 255struct perf_env;
255int symbol__init(struct perf_session_env *env); 256int symbol__init(struct perf_env *env);
256void symbol__exit(void); 257void symbol__exit(void);
257void symbol__elf_init(void); 258void symbol__elf_init(void);
258struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); 259struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 292ae2c90e06..6ec3c5ca438f 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -195,7 +195,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
195 pid_t pid, prev_pid = INT_MAX; 195 pid_t pid, prev_pid = INT_MAX;
196 char *end_ptr; 196 char *end_ptr;
197 struct str_node *pos; 197 struct str_node *pos;
198 struct strlist *slist = strlist__new(false, pid_str); 198 struct strlist_config slist_config = { .dont_dupstr = true, };
199 struct strlist *slist = strlist__new(pid_str, &slist_config);
199 200
200 if (!slist) 201 if (!slist)
201 return NULL; 202 return NULL;
@@ -265,13 +266,14 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
265 pid_t tid, prev_tid = INT_MAX; 266 pid_t tid, prev_tid = INT_MAX;
266 char *end_ptr; 267 char *end_ptr;
267 struct str_node *pos; 268 struct str_node *pos;
269 struct strlist_config slist_config = { .dont_dupstr = true, };
268 struct strlist *slist; 270 struct strlist *slist;
269 271
270 /* perf-stat expects threads to be generated even if tid not given */ 272 /* perf-stat expects threads to be generated even if tid not given */
271 if (!tid_str) 273 if (!tid_str)
272 return thread_map__new_dummy(); 274 return thread_map__new_dummy();
273 275
274 slist = strlist__new(false, tid_str); 276 slist = strlist__new(tid_str, &slist_config);
275 if (!slist) 277 if (!slist)
276 return NULL; 278 return NULL;
277 279
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index c307dd438286..cab8cc24831b 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -46,6 +46,7 @@ struct perf_tool {
46 lost_samples, 46 lost_samples,
47 aux, 47 aux,
48 itrace_start, 48 itrace_start,
49 context_switch,
49 throttle, 50 throttle,
50 unthrottle; 51 unthrottle;
51 event_attr_op attr; 52 event_attr_op attr;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index eb72716017ac..22245986e59e 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -341,20 +341,14 @@ out:
341 341
342static int record_proc_kallsyms(void) 342static int record_proc_kallsyms(void)
343{ 343{
344 unsigned int size; 344 unsigned long long size = 0;
345 const char *path = "/proc/kallsyms"; 345 /*
346 struct stat st; 346 * Just to keep older perf.data file parsers happy, record a zero
347 int ret, err = 0; 347 * sized kallsyms file, i.e. do the same thing that was done when
348 348 * /proc/kallsyms (or something specified via --kallsyms, in a
349 ret = stat(path, &st); 349 * different path) couldn't be read.
350 if (ret < 0) { 350 */
351 /* not found */ 351 return write(output_fd, &size, 4) != 4 ? -EIO : 0;
352 size = 0;
353 if (write(output_fd, &size, 4) != 4)
354 err = -EIO;
355 return err;
356 }
357 return record_file(path, 4);
358} 352}
359 353
360static int record_ftrace_printk(void) 354static int record_ftrace_printk(void)
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index d4957418657e..8ff7d620d942 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -135,36 +135,6 @@ void event_format__print(struct event_format *event,
135 return event_format__fprintf(event, cpu, data, size, stdout); 135 return event_format__fprintf(event, cpu, data, size, stdout);
136} 136}
137 137
138void parse_proc_kallsyms(struct pevent *pevent,
139 char *file, unsigned int size __maybe_unused)
140{
141 unsigned long long addr;
142 char *func;
143 char *line;
144 char *next = NULL;
145 char *addr_str;
146 char *mod;
147 char *fmt = NULL;
148
149 line = strtok_r(file, "\n", &next);
150 while (line) {
151 mod = NULL;
152 addr_str = strtok_r(line, " ", &fmt);
153 addr = strtoull(addr_str, NULL, 16);
154 /* skip character */
155 strtok_r(NULL, " ", &fmt);
156 func = strtok_r(NULL, "\t", &fmt);
157 mod = strtok_r(NULL, "]", &fmt);
158 /* truncate the extra '[' */
159 if (mod)
160 mod = mod + 1;
161
162 pevent_register_function(pevent, func, addr, mod);
163
164 line = strtok_r(NULL, "\n", &next);
165 }
166}
167
168void parse_ftrace_printk(struct pevent *pevent, 138void parse_ftrace_printk(struct pevent *pevent,
169 char *file, unsigned int size __maybe_unused) 139 char *file, unsigned int size __maybe_unused)
170{ 140{
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 54d9e9b548a8..b67a0ccf5ab9 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -162,25 +162,23 @@ out:
162static int read_proc_kallsyms(struct pevent *pevent) 162static int read_proc_kallsyms(struct pevent *pevent)
163{ 163{
164 unsigned int size; 164 unsigned int size;
165 char *buf;
166 165
167 size = read4(pevent); 166 size = read4(pevent);
168 if (!size) 167 if (!size)
169 return 0; 168 return 0;
170 169 /*
171 buf = malloc(size + 1); 170 * Just skip it, now that we configure libtraceevent to use the
172 if (buf == NULL) 171 * tools/perf/ symbol resolver.
173 return -1; 172 *
174 173 * We need to skip it so that we can continue parsing old perf.data
175 if (do_read(buf, size) < 0) { 174 * files, that contains this /proc/kallsyms payload.
176 free(buf); 175 *
177 return -1; 176 * Newer perf.data files will have just the 4-bytes zeros "kallsyms
178 } 177 * payload", so that older tools can continue reading it and interpret
179 buf[size] = '\0'; 178 * it as "no kallsyms payload is present".
180 179 */
181 parse_proc_kallsyms(pevent, buf, size); 180 lseek(input_fd, size, SEEK_CUR);
182 181 trace_data_size += size;
183 free(buf);
184 return 0; 182 return 0;
185} 183}
186 184
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 6322d37164c5..b90e646c7a91 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <traceevent/event-parse.h> 10#include <traceevent/event-parse.h>
11#include "trace-event.h" 11#include "trace-event.h"
12#include "machine.h"
12#include "util.h" 13#include "util.h"
13 14
14/* 15/*
@@ -19,6 +20,7 @@
19 * there. 20 * there.
20 */ 21 */
21static struct trace_event tevent; 22static struct trace_event tevent;
23static bool tevent_initialized;
22 24
23int trace_event__init(struct trace_event *t) 25int trace_event__init(struct trace_event *t)
24{ 26{
@@ -32,6 +34,31 @@ int trace_event__init(struct trace_event *t)
32 return pevent ? 0 : -1; 34 return pevent ? 0 : -1;
33} 35}
34 36
37static int trace_event__init2(void)
38{
39 int be = traceevent_host_bigendian();
40 struct pevent *pevent;
41
42 if (trace_event__init(&tevent))
43 return -1;
44
45 pevent = tevent.pevent;
46 pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
47 pevent_set_file_bigendian(pevent, be);
48 pevent_set_host_bigendian(pevent, be);
49 tevent_initialized = true;
50 return 0;
51}
52
53int trace_event__register_resolver(struct machine *machine,
54 pevent_func_resolver_t *func)
55{
56 if (!tevent_initialized && trace_event__init2())
57 return -1;
58
59 return pevent_set_function_resolver(tevent.pevent, func, machine);
60}
61
35void trace_event__cleanup(struct trace_event *t) 62void trace_event__cleanup(struct trace_event *t)
36{ 63{
37 traceevent_unload_plugins(t->plugin_list, t->pevent); 64 traceevent_unload_plugins(t->plugin_list, t->pevent);
@@ -62,21 +89,8 @@ tp_format(const char *sys, const char *name)
62struct event_format* 89struct event_format*
63trace_event__tp_format(const char *sys, const char *name) 90trace_event__tp_format(const char *sys, const char *name)
64{ 91{
65 static bool initialized; 92 if (!tevent_initialized && trace_event__init2())
66 93 return NULL;
67 if (!initialized) {
68 int be = traceevent_host_bigendian();
69 struct pevent *pevent;
70
71 if (trace_event__init(&tevent))
72 return NULL;
73
74 pevent = tevent.pevent;
75 pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
76 pevent_set_file_bigendian(pevent, be);
77 pevent_set_host_bigendian(pevent, be);
78 initialized = true;
79 }
80 94
81 return tp_format(sys, name); 95 return tp_format(sys, name);
82} 96}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d5168f0be4ec..da6cc4cc2a4f 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -18,6 +18,8 @@ struct trace_event {
18 18
19int trace_event__init(struct trace_event *t); 19int trace_event__init(struct trace_event *t);
20void trace_event__cleanup(struct trace_event *t); 20void trace_event__cleanup(struct trace_event *t);
21int trace_event__register_resolver(struct machine *machine,
22 pevent_func_resolver_t *func);
21struct event_format* 23struct event_format*
22trace_event__tp_format(const char *sys, const char *name); 24trace_event__tp_format(const char *sys, const char *name);
23 25
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index edc2d633b332..7acafb3c5592 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -34,6 +34,7 @@ bool test_attr__enabled;
34bool perf_host = true; 34bool perf_host = true;
35bool perf_guest = false; 35bool perf_guest = false;
36 36
37char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing";
37char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; 38char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events";
38 39
39void event_attr_init(struct perf_event_attr *attr) 40void event_attr_init(struct perf_event_attr *attr)
@@ -391,6 +392,8 @@ void set_term_quiet_input(struct termios *old)
391 392
392static void set_tracing_events_path(const char *tracing, const char *mountpoint) 393static void set_tracing_events_path(const char *tracing, const char *mountpoint)
393{ 394{
395 snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
396 mountpoint, tracing);
394 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", 397 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
395 mountpoint, tracing, "events"); 398 mountpoint, tracing, "events");
396} 399}
@@ -436,66 +439,14 @@ const char *perf_debugfs_mount(const char *mountpoint)
436 439
437void perf_debugfs_set_path(const char *mntpt) 440void perf_debugfs_set_path(const char *mntpt)
438{ 441{
439 snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt);
440 set_tracing_events_path("tracing/", mntpt); 442 set_tracing_events_path("tracing/", mntpt);
441} 443}
442 444
443static const char *find_tracefs(void)
444{
445 const char *path = __perf_tracefs_mount(NULL);
446
447 return path;
448}
449
450static const char *find_debugfs(void)
451{
452 const char *path = __perf_debugfs_mount(NULL);
453
454 if (!path)
455 fprintf(stderr, "Your kernel does not support the debugfs filesystem");
456
457 return path;
458}
459
460/*
461 * Finds the path to the debugfs/tracing
462 * Allocates the string and stores it.
463 */
464const char *find_tracing_dir(void)
465{
466 const char *tracing_dir = "";
467 static char *tracing;
468 static int tracing_found;
469 const char *debugfs;
470
471 if (tracing_found)
472 return tracing;
473
474 debugfs = find_tracefs();
475 if (!debugfs) {
476 tracing_dir = "/tracing";
477 debugfs = find_debugfs();
478 if (!debugfs)
479 return NULL;
480 }
481
482 if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0)
483 return NULL;
484
485 tracing_found = 1;
486 return tracing;
487}
488
489char *get_tracing_file(const char *name) 445char *get_tracing_file(const char *name)
490{ 446{
491 const char *tracing;
492 char *file; 447 char *file;
493 448
494 tracing = find_tracing_dir(); 449 if (asprintf(&file, "%s/%s", tracing_path, name) < 0)
495 if (!tracing)
496 return NULL;
497
498 if (asprintf(&file, "%s/%s", tracing, name) < 0)
499 return NULL; 450 return NULL;
500 451
501 return file; 452 return file;
@@ -566,6 +517,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
566 return (unsigned long) -1; 517 return (unsigned long) -1;
567} 518}
568 519
520int get_stack_size(const char *str, unsigned long *_size)
521{
522 char *endptr;
523 unsigned long size;
524 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
525
526 size = strtoul(str, &endptr, 0);
527
528 do {
529 if (*endptr)
530 break;
531
532 size = round_up(size, sizeof(u64));
533 if (!size || size > max_size)
534 break;
535
536 *_size = size;
537 return 0;
538
539 } while (0);
540
541 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
542 max_size, str);
543 return -1;
544}
545
546int parse_callchain_record(const char *arg, struct callchain_param *param)
547{
548 char *tok, *name, *saveptr = NULL;
549 char *buf;
550 int ret = -1;
551
552 /* We need buffer that we know we can write to. */
553 buf = malloc(strlen(arg) + 1);
554 if (!buf)
555 return -ENOMEM;
556
557 strcpy(buf, arg);
558
559 tok = strtok_r((char *)buf, ",", &saveptr);
560 name = tok ? : (char *)buf;
561
562 do {
563 /* Framepointer style */
564 if (!strncmp(name, "fp", sizeof("fp"))) {
565 if (!strtok_r(NULL, ",", &saveptr)) {
566 param->record_mode = CALLCHAIN_FP;
567 ret = 0;
568 } else
569 pr_err("callchain: No more arguments "
570 "needed for --call-graph fp\n");
571 break;
572
573#ifdef HAVE_DWARF_UNWIND_SUPPORT
574 /* Dwarf style */
575 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
576 const unsigned long default_stack_dump_size = 8192;
577
578 ret = 0;
579 param->record_mode = CALLCHAIN_DWARF;
580 param->dump_size = default_stack_dump_size;
581
582 tok = strtok_r(NULL, ",", &saveptr);
583 if (tok) {
584 unsigned long size = 0;
585
586 ret = get_stack_size(tok, &size);
587 param->dump_size = size;
588 }
589#endif /* HAVE_DWARF_UNWIND_SUPPORT */
590 } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
591 if (!strtok_r(NULL, ",", &saveptr)) {
592 param->record_mode = CALLCHAIN_LBR;
593 ret = 0;
594 } else
595 pr_err("callchain: No more arguments "
596 "needed for --call-graph lbr\n");
597 break;
598 } else {
599 pr_err("callchain: Unknown --call-graph option "
600 "value: %s\n", arg);
601 break;
602 }
603
604 } while (0);
605
606 free(buf);
607 return ret;
608}
609
569int filename__read_str(const char *filename, char **buf, size_t *sizep) 610int filename__read_str(const char *filename, char **buf, size_t *sizep)
570{ 611{
571 size_t size = 0, alloc_size = 0; 612 size_t size = 0, alloc_size = 0;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 8bce58b47a82..291be1d84bc3 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -83,10 +83,10 @@
83extern const char *graph_line; 83extern const char *graph_line;
84extern const char *graph_dotted_line; 84extern const char *graph_dotted_line;
85extern char buildid_dir[]; 85extern char buildid_dir[];
86extern char tracing_path[];
86extern char tracing_events_path[]; 87extern char tracing_events_path[];
87extern void perf_debugfs_set_path(const char *mountpoint); 88extern void perf_debugfs_set_path(const char *mountpoint);
88const char *perf_debugfs_mount(const char *mountpoint); 89const char *perf_debugfs_mount(const char *mountpoint);
89const char *find_tracing_dir(void);
90char *get_tracing_file(const char *name); 90char *get_tracing_file(const char *name);
91void put_tracing_file(char *file); 91void put_tracing_file(char *file);
92 92
@@ -318,6 +318,7 @@ static inline int path__join3(char *bf, size_t size,
318struct dso; 318struct dso;
319struct symbol; 319struct symbol;
320 320
321extern bool srcline_full_filename;
321char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, 322char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
322 bool show_sym); 323 bool show_sym);
323void free_srcline(char *srcline); 324void free_srcline(char *srcline);
@@ -339,4 +340,18 @@ int gzip_decompress_to_file(const char *input, int output_fd);
339int lzma_decompress_to_file(const char *input, int output_fd); 340int lzma_decompress_to_file(const char *input, int output_fd);
340#endif 341#endif
341 342
343char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
344
345static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints)
346{
347 return asprintf_expr_inout_ints(var, true, nints, ints);
348}
349
350static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints)
351{
352 return asprintf_expr_inout_ints(var, false, nints, ints);
353}
354
355int get_stack_size(const char *str, unsigned long *_size);
356
342#endif /* GIT_COMPAT_UTIL_H */ 357#endif /* GIT_COMPAT_UTIL_H */